rsssf 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +2 -0
- data/Manifest.txt +3 -6
- data/README.md +43 -26
- data/Rakefile +8 -7
- data/lib/rsssf/convert.rb +495 -0
- data/lib/rsssf/download.rb +151 -0
- data/lib/rsssf/page.rb +70 -45
- data/lib/rsssf/repo.rb +77 -153
- data/lib/rsssf/reports/page.rb +30 -19
- data/lib/rsssf/reports/schedule.rb +111 -25
- data/lib/rsssf/schedule.rb +4 -14
- data/lib/rsssf/utils.rb +10 -29
- data/lib/rsssf/version.rb +3 -5
- data/lib/rsssf.rb +42 -19
- metadata +26 -25
- data/.gemtest +0 -0
- data/lib/rsssf/fetch.rb +0 -80
- data/lib/rsssf/html2txt.rb +0 -157
- data/lib/rsssf/patch.rb +0 -28
- data/test/helper.rb +0 -12
- data/test/test_utils.rb +0 -83
data/lib/rsssf/reports/page.rb
CHANGED
@@ -1,25 +1,31 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
|
4
3
|
module Rsssf
|
5
4
|
|
6
5
|
class PageReport
|
7
6
|
|
7
|
+
|
8
|
+
def self.build( files, title: )
|
9
|
+
stats = []
|
10
|
+
files.each do |file|
|
11
|
+
page = Page.read_txt( file )
|
12
|
+
stats << page.build_stat
|
13
|
+
end
|
14
|
+
|
15
|
+
new( stats, title: title )
|
16
|
+
end
|
17
|
+
|
18
|
+
|
8
19
|
attr_reader :title
|
9
20
|
|
10
|
-
def initialize( stats,
|
21
|
+
def initialize( stats, title: )
|
11
22
|
@stats = stats
|
12
|
-
@
|
13
|
-
|
14
|
-
@title = opts[:title] || 'Your Title Here'
|
23
|
+
@title = title
|
15
24
|
end
|
16
25
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
f.write build_summary
|
21
|
-
end
|
22
|
-
end
|
26
|
+
### save report as README.md in repo
|
27
|
+
def save( path ) write_text( path, build_summary ); end
|
28
|
+
|
23
29
|
|
24
30
|
def build_summary
|
25
31
|
|
@@ -33,19 +39,26 @@ def build_summary
|
|
33
39
|
|
34
40
|
football.db RSSSF Archive Data Summary for #{title}
|
35
41
|
|
36
|
-
_Last Update: #{Time.now}_
|
37
|
-
|
38
42
|
EOS
|
39
43
|
|
44
|
+
## no longer add last update
|
45
|
+
## _Last Update: #{Time.now}_
|
46
|
+
|
47
|
+
|
40
48
|
txt = ''
|
41
49
|
txt << header
|
42
50
|
|
43
|
-
txt << "|
|
44
|
-
txt << "| :----- |
|
51
|
+
txt << "| File | Authors | Last Updated | Lines (Chars) | Sections |\n"
|
52
|
+
txt << "| :----- | :------- | :----------- | ------------: | :------- |\n"
|
53
|
+
|
54
|
+
## note - removed season (no longer tracked here)
|
45
55
|
|
46
56
|
stats.each do |stat|
|
47
|
-
|
48
|
-
|
57
|
+
## get basename from source url
|
58
|
+
url_path = URI.parse( stat.source ).path
|
59
|
+
basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
|
60
|
+
|
61
|
+
txt << "| [#{basename}.txt](#{basename}.txt) "
|
49
62
|
txt << "| #{stat.authors} "
|
50
63
|
txt << "| #{stat.last_updated} "
|
51
64
|
txt << "| #{stat.line_count} (#{stat.char_count}) "
|
@@ -60,5 +73,3 @@ end # method build_summary
|
|
60
73
|
end ## class PageReport
|
61
74
|
end ## module Rsssf
|
62
75
|
|
63
|
-
## add (shortcut) alias
|
64
|
-
RsssfPageReport = Rsssf::PageReport
|
@@ -1,30 +1,72 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
|
3
3
|
module Rsssf
|
4
|
-
|
4
|
+
|
5
|
+
|
6
|
+
ScheduleStat = Struct.new(
|
7
|
+
:path, ## path to .txt file
|
8
|
+
:errors ## array or nil
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
|
5
13
|
class ScheduleReport
|
6
14
|
|
7
|
-
|
15
|
+
include Utils ## e.g. year_from_file, etc.
|
16
|
+
|
17
|
+
##
|
18
|
+
## quick hack? pass along (optional) patch
|
19
|
+
|
20
|
+
def self.build( files, title:,
|
21
|
+
patch: nil )
|
22
|
+
linter = Parser::Linter.new
|
8
23
|
|
9
|
-
|
10
|
-
|
11
|
-
|
24
|
+
stats = []
|
25
|
+
files.each_with_index do |file,i|
|
26
|
+
|
27
|
+
puts "==> [#{i+1}/#{files.size}] reading >#{file}<..."
|
28
|
+
|
29
|
+
txt = read_text( file )
|
30
|
+
|
31
|
+
if patch && patch.respond_to?(:on_parse)
|
32
|
+
season_dir = File.basename(File.dirname(file))
|
33
|
+
season = Season( season_dir )
|
34
|
+
basename = File.basename(file, File.extname(file))
|
35
|
+
puts " [debug] before patch.on_parse #{basename}, #{season}"
|
36
|
+
txt = patch.on_parse( txt, basename, season )
|
37
|
+
end
|
38
|
+
|
39
|
+
linter.parse( txt, parse: true,
|
40
|
+
path: file ) ## todo/fix - change path to file/filename - why? why not?
|
12
41
|
|
13
|
-
|
42
|
+
stat = ScheduleStat.new
|
43
|
+
stat.path = file
|
44
|
+
stat.errors = linter.errors
|
45
|
+
|
46
|
+
stats << stat
|
47
|
+
end
|
48
|
+
|
49
|
+
new( stats, title: title )
|
14
50
|
end
|
15
51
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
52
|
+
|
53
|
+
attr_reader :title
|
54
|
+
|
55
|
+
def initialize( stats, title: )
|
56
|
+
@stats = stats
|
57
|
+
@title = title
|
21
58
|
end
|
22
59
|
|
60
|
+
### save report as README.md in repo
|
61
|
+
def save( path ) write_text( path, build_summary ); end
|
62
|
+
|
63
|
+
|
23
64
|
def build_summary
|
24
|
-
## sort start by season (latest first) than
|
65
|
+
## sort start 1) by season (latest first) than
|
66
|
+
## 2) by name (e.g. 1-bundesliga, cup, etc.)
|
25
67
|
stats = @stats.sort do |l,r|
|
26
|
-
v = r.
|
27
|
-
v = l.
|
68
|
+
v = File.basename(File.dirname(r.path)) <=> File.basename(File.dirname(l.path))
|
69
|
+
v = File.basename(l.path) <=> File.basename(r.path) if v == 0 ## same season
|
28
70
|
v
|
29
71
|
end
|
30
72
|
|
@@ -35,11 +77,14 @@ def build_summary
|
|
35
77
|
football.db RSSSF (Rec.Sport.Soccer Statistics Foundation) Archive Data for
|
36
78
|
#{title}
|
37
79
|
|
38
|
-
_Last Update: #{Time.now}_
|
39
|
-
|
40
80
|
EOS
|
41
81
|
|
82
|
+
## no longer add last update
|
83
|
+
## _Last Update: #{Time.now}_
|
84
|
+
##
|
42
85
|
|
86
|
+
|
87
|
+
=begin
|
43
88
|
footer =<<EOS
|
44
89
|
|
45
90
|
## Questions? Comments?
|
@@ -48,30 +93,71 @@ Send them along to the
|
|
48
93
|
[Open Sports & Friends Forum](http://groups.google.com/group/opensport).
|
49
94
|
Thanks!
|
50
95
|
EOS
|
96
|
+
=end
|
97
|
+
|
51
98
|
|
99
|
+
errors = []
|
52
100
|
|
53
|
-
|
101
|
+
|
102
|
+
txt = String.new
|
54
103
|
txt << header
|
55
104
|
|
56
|
-
txt << "| Season | League, Cup |
|
105
|
+
txt << "| Season | League, Cup | Errors |\n"
|
57
106
|
txt << "| :----- | :---------- | -----: |\n"
|
58
107
|
|
108
|
+
|
109
|
+
stats.each_with_index do |stat,i|
|
110
|
+
|
111
|
+
path = stat.path
|
112
|
+
season_dir = File.basename(File.dirname( path ))
|
113
|
+
filename = File.basename( path ) ## incl. extension !!
|
114
|
+
|
115
|
+
season = Season( season_dir )
|
116
|
+
## note - use archive_dir_for_season for archive path
|
117
|
+
|
118
|
+
|
119
|
+
txt << "| #{season_dir} "
|
120
|
+
txt << "| [#{filename}](#{archive_dir_for_season(season)}/#{filename}) "
|
121
|
+
|
122
|
+
txt << if stat.errors.size > 0
|
123
|
+
"| **!! #{stat.errors.size}** "
|
124
|
+
else
|
125
|
+
"| OK "
|
126
|
+
end
|
127
|
+
txt << "|\n"
|
128
|
+
|
129
|
+
errors += stat.errors if stat.errors.size > 0
|
130
|
+
end
|
131
|
+
|
132
|
+
if errors.size > 0
|
133
|
+
txt << "\n\n"
|
134
|
+
txt << "#{errors.size} errors in #{stats.size} datafile(s)\n\n"
|
135
|
+
|
136
|
+
txt << "```\n"
|
137
|
+
errors.each do |path, msg, line|
|
138
|
+
season_dir = File.basename(File.dirname( path ))
|
139
|
+
filename = File.basename( path ) ## incl. extension !!
|
140
|
+
|
141
|
+
txt <<"#{season_dir}/#{filename} -- #{msg}\n"
|
142
|
+
txt << " in line >#{line}<\n" unless line.empty?
|
143
|
+
end
|
144
|
+
txt << "```\n"
|
145
|
+
end
|
146
|
+
|
147
|
+
=begin
|
59
148
|
stats.each do |stat|
|
60
149
|
txt << "| #{stat.season} "
|
61
150
|
txt << "| [#{stat.filename}](#{stat.path}/#{stat.filename}) "
|
62
151
|
txt << "| #{stat.rounds} "
|
63
152
|
txt << "|\n"
|
64
153
|
end
|
154
|
+
=end
|
65
155
|
|
66
|
-
|
67
|
-
|
68
|
-
txt << footer
|
156
|
+
|
157
|
+
## txt << footer
|
69
158
|
txt
|
70
159
|
end # method build_summary
|
71
160
|
|
72
161
|
end ## class ScheduleReport
|
73
162
|
end ## module Rsssf
|
74
163
|
|
75
|
-
## add (shortcut) alias
|
76
|
-
RsssfScheduleReport = Rsssf::ScheduleReport
|
77
|
-
|
data/lib/rsssf/schedule.rb
CHANGED
@@ -1,31 +1,21 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module Rsssf
|
4
3
|
|
5
4
|
class Schedule
|
6
5
|
|
7
|
-
def self.from_string( txt )
|
8
|
-
self.new( txt )
|
9
|
-
end
|
10
6
|
|
11
|
-
attr_accessor :rounds # track no of rounds
|
7
|
+
# attr_accessor :rounds # track no of rounds - why? why not?
|
12
8
|
|
13
9
|
def initialize( txt )
|
14
10
|
@txt = txt
|
15
11
|
|
16
|
-
@rounds = nil # undefined
|
12
|
+
## @rounds = nil # undefined
|
17
13
|
end
|
18
14
|
|
19
15
|
|
20
|
-
def save( path )
|
21
|
-
|
22
|
-
f.write @txt
|
23
|
-
end
|
16
|
+
def save( path, header: )
|
17
|
+
write_text( path, header + @txt )
|
24
18
|
end
|
25
19
|
|
26
20
|
end ## class Schedule
|
27
21
|
end ## module Rsssf
|
28
|
-
|
29
|
-
## add (shortcut) alias
|
30
|
-
RsssfSchedule = Rsssf::Schedule
|
31
|
-
|
data/lib/rsssf/utils.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module Rsssf
|
4
3
|
module Utils
|
5
4
|
|
5
|
+
|
6
|
+
## move to Page - why? why not?
|
7
|
+
|
6
8
|
def year_from_file( path )
|
7
9
|
extname = File.extname( path )
|
8
10
|
basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
|
@@ -32,44 +34,23 @@ def year_from_name( name )
|
|
32
34
|
end # method year_from_name
|
33
35
|
|
34
36
|
|
35
|
-
def year_to_season( year )
|
36
|
-
|
37
|
-
## todo: require four digit years? why? why not??
|
38
|
-
|
39
|
-
## e.g. 64 => 1963-64
|
40
|
-
## 2011 => 2010-11 etc.
|
41
|
-
|
42
|
-
if year <= 16 ## assume 20xx for now from 00..16
|
43
|
-
year += 2000
|
44
|
-
elsif year <= 99
|
45
|
-
year += 1900
|
46
|
-
else
|
47
|
-
# use as is; assume four digit year
|
48
|
-
end
|
49
37
|
|
50
|
-
|
38
|
+
def archive_dir_for_season( season )
|
39
|
+
season = Season( season )
|
51
40
|
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
def archive_dir_for_year( year )
|
57
|
-
season = year_to_season( year )
|
58
|
-
if year <= 2010 # e.g. season 2009-10
|
41
|
+
if season < Season('2010') # e.g. season 2009-10
|
59
42
|
## use archive folder (w/ 1980s etc)
|
60
43
|
## get decade folder
|
61
|
-
decade =
|
44
|
+
decade = season.start_year ## 1999/2000 2000
|
62
45
|
decade -= decade % 10 ## turn 1987 into 1980 etc
|
63
|
-
"archive/#{decade}s/#{season}"
|
46
|
+
"archive/#{decade}s/#{season.to_path}"
|
64
47
|
else
|
65
|
-
season
|
48
|
+
season.to_path
|
66
49
|
end
|
67
50
|
end
|
68
51
|
|
69
52
|
|
53
|
+
|
70
54
|
end # module Utils
|
71
55
|
end # module Rsssf
|
72
56
|
|
73
|
-
## add (shortcut) alias
|
74
|
-
RsssfUtils = Rsssf::Utils
|
75
|
-
|
data/lib/rsssf/version.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
|
4
2
|
module Rsssf
|
5
3
|
|
6
4
|
MAJOR = 0
|
7
|
-
MINOR =
|
5
|
+
MINOR = 2
|
8
6
|
PATCH = 0
|
9
7
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
8
|
|
@@ -13,11 +11,11 @@ module Rsssf
|
|
13
11
|
end
|
14
12
|
|
15
13
|
def self.banner
|
16
|
-
"rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
14
|
+
"rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
17
15
|
end
|
18
16
|
|
19
17
|
def self.root
|
20
|
-
|
18
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
21
19
|
end
|
22
20
|
|
23
21
|
end # module Rsssf
|
data/lib/rsssf.rb
CHANGED
@@ -1,34 +1,57 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
|
-
##
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
|
2
|
+
## 3rd party (our own)
|
3
|
+
require 'season/formats' ## add season support
|
4
|
+
require 'webget' ## incl. webget, webcache, webclient, etc.
|
5
|
+
|
6
|
+
require 'cocos'
|
7
|
+
|
8
|
+
|
9
|
+
## (old) 3rd party libs
|
10
|
+
## require 'textutils' ## used for File.read_utf8 etc.
|
11
|
+
## require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
|
12
|
+
|
13
|
+
|
14
|
+
#######
|
15
|
+
## add RsssfParser too
|
16
|
+
require 'rsssf/parser' ## from rsssf-parser gem
|
7
17
|
|
8
18
|
|
9
|
-
## 3rd party libs
|
10
|
-
require 'textutils' ## used for File.read_utf8 etc.
|
11
|
-
require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
|
12
19
|
|
13
20
|
|
14
21
|
## our own code
|
15
|
-
|
22
|
+
require_relative 'rsssf/version' # note: let version always go first
|
23
|
+
|
24
|
+
require_relative 'rsssf/utils' # include Utils - goes first
|
25
|
+
|
26
|
+
require_relative 'rsssf/download'
|
27
|
+
|
28
|
+
require_relative 'rsssf/convert'
|
29
|
+
require_relative 'rsssf/page'
|
30
|
+
require_relative 'rsssf/schedule'
|
31
|
+
|
32
|
+
require_relative 'rsssf/reports/schedule'
|
33
|
+
require_relative 'rsssf/reports/page'
|
34
|
+
|
35
|
+
require_relative 'rsssf/repo'
|
36
|
+
|
16
37
|
|
17
|
-
require 'rsssf/utils' # include Utils - goes first
|
18
|
-
require 'rsssf/html2txt' # include Filters - goes first
|
19
38
|
|
20
|
-
require 'rsssf/fetch'
|
21
|
-
require 'rsssf/page'
|
22
|
-
require 'rsssf/schedule'
|
23
|
-
require 'rsssf/patch'
|
24
39
|
|
25
|
-
|
26
|
-
|
40
|
+
#############
|
41
|
+
## add (shortcut) alias(es)
|
42
|
+
RsssfPage = Rsssf::Page
|
43
|
+
RsssfPageConverter = Rsssf::PageConverter
|
44
|
+
RsssfPageStat = Rsssf::PageStat
|
45
|
+
RsssfPageReport = Rsssf::PageReport
|
27
46
|
|
28
|
-
|
47
|
+
RsssfSchedule = Rsssf::Schedule
|
48
|
+
RsssfScheduleStat = Rsssf::ScheduleStat
|
49
|
+
RsssfScheduleReport = Rsssf::ScheduleReport
|
29
50
|
|
51
|
+
RsssfRepo = Rsssf::Repo
|
52
|
+
RsssfUtils = Rsssf::Utils
|
30
53
|
|
31
54
|
|
32
55
|
|
33
56
|
## say hello
|
34
|
-
puts Rsssf.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
57
|
+
puts Rsssf.banner ## if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsssf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: cocos
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: season-formats
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rsssf-parser
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -56,63 +56,65 @@ dependencies:
|
|
56
56
|
name: rdoc
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '4.0'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '7'
|
62
65
|
type: :development
|
63
66
|
prerelease: false
|
64
67
|
version_requirements: !ruby/object:Gem::Requirement
|
65
68
|
requirements:
|
66
|
-
- - "
|
69
|
+
- - ">="
|
67
70
|
- !ruby/object:Gem::Version
|
68
71
|
version: '4.0'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '7'
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: hoe
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
79
|
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
81
|
+
version: '4.1'
|
76
82
|
type: :development
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
86
|
- - "~>"
|
81
87
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
88
|
+
version: '4.1'
|
83
89
|
description: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
|
84
90
|
archive data
|
85
|
-
email:
|
91
|
+
email: gerald.bauer@gmail.com
|
86
92
|
executables: []
|
87
93
|
extensions: []
|
88
94
|
extra_rdoc_files:
|
89
|
-
-
|
95
|
+
- CHANGELOG.md
|
90
96
|
- Manifest.txt
|
91
97
|
- README.md
|
92
98
|
files:
|
93
|
-
-
|
94
|
-
- HISTORY.md
|
99
|
+
- CHANGELOG.md
|
95
100
|
- Manifest.txt
|
96
101
|
- README.md
|
97
102
|
- Rakefile
|
98
103
|
- lib/rsssf.rb
|
99
|
-
- lib/rsssf/
|
100
|
-
- lib/rsssf/
|
104
|
+
- lib/rsssf/convert.rb
|
105
|
+
- lib/rsssf/download.rb
|
101
106
|
- lib/rsssf/page.rb
|
102
|
-
- lib/rsssf/patch.rb
|
103
107
|
- lib/rsssf/repo.rb
|
104
108
|
- lib/rsssf/reports/page.rb
|
105
109
|
- lib/rsssf/reports/schedule.rb
|
106
110
|
- lib/rsssf/schedule.rb
|
107
111
|
- lib/rsssf/utils.rb
|
108
112
|
- lib/rsssf/version.rb
|
109
|
-
|
110
|
-
- test/test_utils.rb
|
111
|
-
homepage: https://github.com/sportdb/rsssf
|
113
|
+
homepage: https://github.com/sportdb/sport.db.sources
|
112
114
|
licenses:
|
113
115
|
- Public Domain
|
114
116
|
metadata: {}
|
115
|
-
post_install_message:
|
117
|
+
post_install_message:
|
116
118
|
rdoc_options:
|
117
119
|
- "--main"
|
118
120
|
- README.md
|
@@ -122,16 +124,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
124
|
requirements:
|
123
125
|
- - ">="
|
124
126
|
- !ruby/object:Gem::Version
|
125
|
-
version:
|
127
|
+
version: 2.2.2
|
126
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
129
|
requirements:
|
128
130
|
- - ">="
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
|
133
|
-
|
134
|
-
signing_key:
|
134
|
+
rubygems_version: 3.4.10
|
135
|
+
signing_key:
|
135
136
|
specification_version: 4
|
136
137
|
summary: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
|
137
138
|
archive data
|
data/.gemtest
DELETED
File without changes
|
data/lib/rsssf/fetch.rb
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Rsssf
|
4
|
-
|
5
|
-
class PageFetcher
|
6
|
-
|
7
|
-
include Filters # e.g. html2text, sanitize etc.
|
8
|
-
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@worker = Fetcher::Worker.new
|
12
|
-
end
|
13
|
-
|
14
|
-
def fetch( src_url )
|
15
|
-
|
16
|
-
## note: assume plain 7-bit ascii for now
|
17
|
-
## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1) -- does NOT use utf-8 character encoding!!!
|
18
|
-
html = @worker.read( src_url )
|
19
|
-
|
20
|
-
### todo/fix: first check if html is all ascii-7bit e.g.
|
21
|
-
## includes only chars from 64 to 127!!!
|
22
|
-
|
23
|
-
## normalize newlines
|
24
|
-
## remove \r (form feed) used by Windows; just use \n (new line)
|
25
|
-
html = html.gsub( "\r", '' )
|
26
|
-
|
27
|
-
## note:
|
28
|
-
## assume (default) to ISO 3166-15 (an updated version of ISO 3166-1) for now
|
29
|
-
##
|
30
|
-
## other possible alternatives - try:
|
31
|
-
## - Windows CP 1562 or
|
32
|
-
## - ISO 3166-2 (for eastern european languages )
|
33
|
-
##
|
34
|
-
## note: german umlaut use the same code (int)
|
35
|
-
## in ISO 3166-1/15 and 2 and Windows CP1562 (other chars ARE different!!!)
|
36
|
-
|
37
|
-
html = html.force_encoding( Encoding::ISO_8859_15 )
|
38
|
-
html = html.encode( Encoding::UTF_8 ) # try conversion to utf-8
|
39
|
-
|
40
|
-
## check for html entities
|
41
|
-
html = html.gsub( "ä", 'ä' )
|
42
|
-
html = html.gsub( "ö", 'ö' )
|
43
|
-
html = html.gsub( "ü", 'ü' )
|
44
|
-
html = html.gsub( "Ä", 'Ä' )
|
45
|
-
html = html.gsub( "Ö", 'Ö' )
|
46
|
-
html = html.gsub( "Ü", 'Ü' )
|
47
|
-
html = html.gsub( "ß", 'ß' )
|
48
|
-
|
49
|
-
html = html.gsub( "&oulm;", 'ö' ) ## support typo in entity (ö)
|
50
|
-
html = html.gsub( "&slig;", "ß" ) ## support typo in entity (ß)
|
51
|
-
|
52
|
-
html = html.gsub( "É", 'É' )
|
53
|
-
html = html.gsub( "ø", 'ø' )
|
54
|
-
|
55
|
-
## check for more entities
|
56
|
-
html = html.gsub( /&[^;]+;/) do |match|
|
57
|
-
puts "*** found unencoded html entity #{match}"
|
58
|
-
match ## pass through as is (1:1)
|
59
|
-
end
|
60
|
-
## todo/fix: add more entities
|
61
|
-
|
62
|
-
|
63
|
-
txt = html_to_txt( html )
|
64
|
-
|
65
|
-
header = <<EOS
|
66
|
-
<!--
|
67
|
-
source: #{src_url}
|
68
|
-
-->
|
69
|
-
|
70
|
-
EOS
|
71
|
-
|
72
|
-
header+txt ## return txt w/ header
|
73
|
-
end ## method fetch
|
74
|
-
|
75
|
-
end ## class PageFetcher
|
76
|
-
end ## module Rsssf
|
77
|
-
|
78
|
-
## add (shortcut) alias
|
79
|
-
RsssfPageFetcher = Rsssf::PageFetcher
|
80
|
-
|