sportdb-formats 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +37 -1
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +102 -12
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,203 +1,203 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
module Import
|
6
|
-
|
7
|
-
|
8
|
-
class ClubHistoryReader
|
9
|
-
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
-
parse( txt )
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.parse( txt )
|
20
|
-
new( txt ).parse
|
21
|
-
end
|
22
|
-
|
23
|
-
def initialize( txt )
|
24
|
-
@txt = txt
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
###
|
29
|
-
## RENAME/RENAMED
|
30
|
-
## MOVE/MOVED
|
31
|
-
## BANKRUPT/BANKRUPTED
|
32
|
-
## REFORM/REFORMED
|
33
|
-
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
-
|
35
|
-
|
36
|
-
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
-
MOVED?|
|
38
|
-
BANKRUPT(?:ED)?|
|
39
|
-
REFORM(?:ED)?|
|
40
|
-
MERGED?
|
41
|
-
)
|
42
|
-
[ ]+
|
43
|
-
(?<text>.*) # rest of text
|
44
|
-
$
|
45
|
-
}x
|
46
|
-
|
47
|
-
|
48
|
-
def parse
|
49
|
-
recs = []
|
50
|
-
last_rec = nil
|
51
|
-
|
52
|
-
last_country = nil
|
53
|
-
last_season = nil
|
54
|
-
last_keyword = nil
|
55
|
-
last_teams = []
|
56
|
-
|
57
|
-
OutlineReader.parse( @txt ).each do |node|
|
58
|
-
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
-
heading_level = node[0][1].to_i
|
60
|
-
heading = node[1]
|
61
|
-
|
62
|
-
puts "heading #{heading_level} >#{heading}<"
|
63
|
-
|
64
|
-
|
65
|
-
if heading_level == 1
|
66
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
-
## Österreich • Austria (at)
|
68
|
-
## Österreich • Austria
|
69
|
-
## Austria
|
70
|
-
## Deutschland (de) • Germany
|
71
|
-
country =
|
72
|
-
## check country code - MUST exist for now!!!!
|
73
|
-
if country.nil?
|
74
|
-
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
-
exit 1
|
76
|
-
end
|
77
|
-
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
-
last_country = country
|
79
|
-
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
-
last_keyword = nil
|
81
|
-
elsif heading_level == 2
|
82
|
-
## assume season
|
83
|
-
season = Season.parse( heading )
|
84
|
-
puts " season >#{heading}< => #{season.key}"
|
85
|
-
last_season = season ## reset "lowwer levels" - keyword
|
86
|
-
last_keyword = nil
|
87
|
-
else
|
88
|
-
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
-
exit 1
|
90
|
-
end
|
91
|
-
|
92
|
-
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
-
if last_country.nil?
|
94
|
-
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
-
exit 1
|
96
|
-
end
|
97
|
-
if last_season.nil?
|
98
|
-
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
-
exit 1
|
100
|
-
end
|
101
|
-
|
102
|
-
lines = node[1]
|
103
|
-
lines.each do |line|
|
104
|
-
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
-
keyword = m[:keyword]
|
106
|
-
line = m[:text].strip
|
107
|
-
|
108
|
-
puts " keyword #{keyword}"
|
109
|
-
last_keyword = case keyword ## "normalize" keywords
|
110
|
-
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
-
'BANKRUPT'
|
112
|
-
when 'RENAME', 'RENAMED'
|
113
|
-
'RENAME'
|
114
|
-
when 'REFORM', 'REFORMED'
|
115
|
-
'REFORM'
|
116
|
-
when 'MOVE', 'MOVED'
|
117
|
-
'MOVE'
|
118
|
-
when 'MERGE', 'MERGED'
|
119
|
-
'MERGE'
|
120
|
-
else
|
121
|
-
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
-
exit 1
|
123
|
-
end
|
124
|
-
|
125
|
-
last_teams = []
|
126
|
-
end
|
127
|
-
|
128
|
-
if last_keyword.nil?
|
129
|
-
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
-
puts line
|
131
|
-
exit 1
|
132
|
-
end
|
133
|
-
|
134
|
-
if last_keyword == 'BANKRUPT'
|
135
|
-
## requires / expects one team in one line
|
136
|
-
recs << [ last_keyword, last_season.key,
|
137
|
-
[ squish(line), last_country.key ]
|
138
|
-
]
|
139
|
-
elsif last_keyword == 'RENAME' ||
|
140
|
-
last_keyword == 'REFORM' ||
|
141
|
-
last_keyword == 'MOVE'
|
142
|
-
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
-
teams = line.split( '⇒' )
|
144
|
-
if teams.size != 2
|
145
|
-
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
-
pp teams
|
147
|
-
exit 1
|
148
|
-
end
|
149
|
-
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
-
recs << [ last_keyword, last_season.key,
|
151
|
-
[ teams[0], last_country.key ],
|
152
|
-
[ teams[1], last_country.key ]
|
153
|
-
]
|
154
|
-
elsif last_keyword == 'MERGE'
|
155
|
-
## check if line starts with separator
|
156
|
-
## otherwise collect to be merged teams
|
157
|
-
if line.start_with?( '⇒' )
|
158
|
-
if last_teams.size < 2
|
159
|
-
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
-
pp last_teams
|
161
|
-
exit 1
|
162
|
-
end
|
163
|
-
## auto-add country to all teams
|
164
|
-
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
-
recs << [ last_keyword, last_season.key,
|
166
|
-
teams,
|
167
|
-
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
-
]
|
169
|
-
|
170
|
-
last_teams = []
|
171
|
-
else
|
172
|
-
last_teams << squish(line)
|
173
|
-
end
|
174
|
-
else
|
175
|
-
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
-
exit 1
|
177
|
-
end
|
178
|
-
end # each line (in paragraph)
|
179
|
-
else
|
180
|
-
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
-
pp node
|
182
|
-
exit 1
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
recs
|
187
|
-
end # method read
|
188
|
-
|
189
|
-
|
190
|
-
###############
|
191
|
-
## helper
|
192
|
-
|
193
|
-
def squish( str )
|
194
|
-
## colapse all whitespace to one
|
195
|
-
str.gsub( /[ ]+/,' ' )
|
196
|
-
end
|
197
|
-
|
198
|
-
|
199
|
-
end # class ClubHistoryReader
|
200
|
-
|
201
|
-
|
202
|
-
end ## module Import
|
203
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class ClubHistoryReader
|
9
|
+
|
10
|
+
def world() Import.world; end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
+
parse( txt )
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse( txt )
|
20
|
+
new( txt ).parse
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize( txt )
|
24
|
+
@txt = txt
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## RENAME/RENAMED
|
30
|
+
## MOVE/MOVED
|
31
|
+
## BANKRUPT/BANKRUPTED
|
32
|
+
## REFORM/REFORMED
|
33
|
+
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
+
|
35
|
+
|
36
|
+
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
+
MOVED?|
|
38
|
+
BANKRUPT(?:ED)?|
|
39
|
+
REFORM(?:ED)?|
|
40
|
+
MERGED?
|
41
|
+
)
|
42
|
+
[ ]+
|
43
|
+
(?<text>.*) # rest of text
|
44
|
+
$
|
45
|
+
}x
|
46
|
+
|
47
|
+
|
48
|
+
def parse
|
49
|
+
recs = []
|
50
|
+
last_rec = nil
|
51
|
+
|
52
|
+
last_country = nil
|
53
|
+
last_season = nil
|
54
|
+
last_keyword = nil
|
55
|
+
last_teams = []
|
56
|
+
|
57
|
+
OutlineReader.parse( @txt ).each do |node|
|
58
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
+
heading_level = node[0][1].to_i
|
60
|
+
heading = node[1]
|
61
|
+
|
62
|
+
puts "heading #{heading_level} >#{heading}<"
|
63
|
+
|
64
|
+
|
65
|
+
if heading_level == 1
|
66
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
+
## Österreich • Austria (at)
|
68
|
+
## Österreich • Austria
|
69
|
+
## Austria
|
70
|
+
## Deutschland (de) • Germany
|
71
|
+
country = world.countries.parse( heading )
|
72
|
+
## check country code - MUST exist for now!!!!
|
73
|
+
if country.nil?
|
74
|
+
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
+
last_country = country
|
79
|
+
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
+
last_keyword = nil
|
81
|
+
elsif heading_level == 2
|
82
|
+
## assume season
|
83
|
+
season = Season.parse( heading )
|
84
|
+
puts " season >#{heading}< => #{season.key}"
|
85
|
+
last_season = season ## reset "lowwer levels" - keyword
|
86
|
+
last_keyword = nil
|
87
|
+
else
|
88
|
+
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
+
exit 1
|
90
|
+
end
|
91
|
+
|
92
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
+
if last_country.nil?
|
94
|
+
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
if last_season.nil?
|
98
|
+
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
+
exit 1
|
100
|
+
end
|
101
|
+
|
102
|
+
lines = node[1]
|
103
|
+
lines.each do |line|
|
104
|
+
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
+
keyword = m[:keyword]
|
106
|
+
line = m[:text].strip
|
107
|
+
|
108
|
+
puts " keyword #{keyword}"
|
109
|
+
last_keyword = case keyword ## "normalize" keywords
|
110
|
+
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
+
'BANKRUPT'
|
112
|
+
when 'RENAME', 'RENAMED'
|
113
|
+
'RENAME'
|
114
|
+
when 'REFORM', 'REFORMED'
|
115
|
+
'REFORM'
|
116
|
+
when 'MOVE', 'MOVED'
|
117
|
+
'MOVE'
|
118
|
+
when 'MERGE', 'MERGED'
|
119
|
+
'MERGE'
|
120
|
+
else
|
121
|
+
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
last_teams = []
|
126
|
+
end
|
127
|
+
|
128
|
+
if last_keyword.nil?
|
129
|
+
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
+
puts line
|
131
|
+
exit 1
|
132
|
+
end
|
133
|
+
|
134
|
+
if last_keyword == 'BANKRUPT'
|
135
|
+
## requires / expects one team in one line
|
136
|
+
recs << [ last_keyword, last_season.key,
|
137
|
+
[ squish(line), last_country.key ]
|
138
|
+
]
|
139
|
+
elsif last_keyword == 'RENAME' ||
|
140
|
+
last_keyword == 'REFORM' ||
|
141
|
+
last_keyword == 'MOVE'
|
142
|
+
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
+
teams = line.split( '⇒' )
|
144
|
+
if teams.size != 2
|
145
|
+
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
+
pp teams
|
147
|
+
exit 1
|
148
|
+
end
|
149
|
+
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
+
recs << [ last_keyword, last_season.key,
|
151
|
+
[ teams[0], last_country.key ],
|
152
|
+
[ teams[1], last_country.key ]
|
153
|
+
]
|
154
|
+
elsif last_keyword == 'MERGE'
|
155
|
+
## check if line starts with separator
|
156
|
+
## otherwise collect to be merged teams
|
157
|
+
if line.start_with?( '⇒' )
|
158
|
+
if last_teams.size < 2
|
159
|
+
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
+
pp last_teams
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
## auto-add country to all teams
|
164
|
+
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
+
recs << [ last_keyword, last_season.key,
|
166
|
+
teams,
|
167
|
+
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
+
]
|
169
|
+
|
170
|
+
last_teams = []
|
171
|
+
else
|
172
|
+
last_teams << squish(line)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
+
exit 1
|
177
|
+
end
|
178
|
+
end # each line (in paragraph)
|
179
|
+
else
|
180
|
+
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
+
pp node
|
182
|
+
exit 1
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
recs
|
187
|
+
end # method read
|
188
|
+
|
189
|
+
|
190
|
+
###############
|
191
|
+
## helper
|
192
|
+
|
193
|
+
def squish( str )
|
194
|
+
## colapse all whitespace to one
|
195
|
+
str.gsub( /[ ]+/,' ' )
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
end # class ClubHistoryReader
|
200
|
+
|
201
|
+
|
202
|
+
end ## module Import
|
203
|
+
end ## module SportDb
|
@@ -1,108 +1,108 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
module Import
|
6
|
-
|
7
|
-
|
8
|
-
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
-
|
10
|
-
class WikiClub # nested class
|
11
|
-
attr_reader :name, :country
|
12
|
-
def initialize( name, country )
|
13
|
-
@name, @country = name, country
|
14
|
-
end
|
15
|
-
end # (nested) class WikiClub
|
16
|
-
|
17
|
-
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
22
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
23
|
-
parse( txt )
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.parse( txt )
|
27
|
-
new( txt ).parse
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize( txt )
|
31
|
-
@txt = txt
|
32
|
-
end
|
33
|
-
|
34
|
-
def parse
|
35
|
-
recs = []
|
36
|
-
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
37
|
-
|
38
|
-
@txt.each_line do |line|
|
39
|
-
line = line.strip
|
40
|
-
|
41
|
-
next if line.empty?
|
42
|
-
next if line.start_with?( '#' ) ## skip comments too
|
43
|
-
|
44
|
-
## strip inline (until end-of-line) comments too
|
45
|
-
## e.g Eupen => KAS Eupen, ## [de]
|
46
|
-
## => Eupen => KAS Eupen,
|
47
|
-
line = line.sub( /#.*/, '' ).strip
|
48
|
-
pp line
|
49
|
-
|
50
|
-
|
51
|
-
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
52
|
-
|
53
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
54
|
-
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
55
|
-
if line =~ /^(={1,}) ## leading ======
|
56
|
-
([^=]+?) ## text (note: for now no "inline" = allowed)
|
57
|
-
=* ## (optional) trailing ====
|
58
|
-
$/x
|
59
|
-
heading_marker = $1
|
60
|
-
heading_level = $1.length ## count number of = for heading level
|
61
|
-
heading = $2.strip
|
62
|
-
|
63
|
-
puts "heading #{heading_level} >#{heading}<"
|
64
|
-
|
65
|
-
if heading_level > 1
|
66
|
-
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
67
|
-
exit 1
|
68
|
-
end
|
69
|
-
|
70
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
71
|
-
## Österreich • Austria (at)
|
72
|
-
## Österreich • Austria
|
73
|
-
## Austria
|
74
|
-
## Deutschland (de) • Germany
|
75
|
-
country =
|
76
|
-
## check country code - MUST exist for now!!!!
|
77
|
-
if country.nil?
|
78
|
-
puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
79
|
-
exit 1
|
80
|
-
end
|
81
|
-
|
82
|
-
last_country = country
|
83
|
-
pp last_country
|
84
|
-
else
|
85
|
-
## strip and squish (white)spaces
|
86
|
-
# e.g. New York FC (2011-) => New York FC (2011-)
|
87
|
-
value = line.strip.gsub( /[ \t]+/, ' ' )
|
88
|
-
|
89
|
-
## normalize (allow underscore (-) - replace with space)
|
90
|
-
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
91
|
-
value = value.gsub( '_', ' ' )
|
92
|
-
|
93
|
-
if last_country.nil?
|
94
|
-
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
95
|
-
exit 1
|
96
|
-
end
|
97
|
-
|
98
|
-
rec = WikiClub.new( value, last_country )
|
99
|
-
recs << rec
|
100
|
-
end
|
101
|
-
end # each_line
|
102
|
-
recs
|
103
|
-
end # method read
|
104
|
-
|
105
|
-
end # class WikiReader
|
106
|
-
|
107
|
-
end ## module Import
|
108
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
+
|
10
|
+
class WikiClub # nested class
|
11
|
+
attr_reader :name, :country
|
12
|
+
def initialize( name, country )
|
13
|
+
@name, @country = name, country
|
14
|
+
end
|
15
|
+
end # (nested) class WikiClub
|
16
|
+
|
17
|
+
|
18
|
+
def world() Import.world; end
|
19
|
+
|
20
|
+
|
21
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
22
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
23
|
+
parse( txt )
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse( txt )
|
27
|
+
new( txt ).parse
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize( txt )
|
31
|
+
@txt = txt
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse
|
35
|
+
recs = []
|
36
|
+
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
37
|
+
|
38
|
+
@txt.each_line do |line|
|
39
|
+
line = line.strip
|
40
|
+
|
41
|
+
next if line.empty?
|
42
|
+
next if line.start_with?( '#' ) ## skip comments too
|
43
|
+
|
44
|
+
## strip inline (until end-of-line) comments too
|
45
|
+
## e.g Eupen => KAS Eupen, ## [de]
|
46
|
+
## => Eupen => KAS Eupen,
|
47
|
+
line = line.sub( /#.*/, '' ).strip
|
48
|
+
pp line
|
49
|
+
|
50
|
+
|
51
|
+
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
52
|
+
|
53
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
54
|
+
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
55
|
+
if line =~ /^(={1,}) ## leading ======
|
56
|
+
([^=]+?) ## text (note: for now no "inline" = allowed)
|
57
|
+
=* ## (optional) trailing ====
|
58
|
+
$/x
|
59
|
+
heading_marker = $1
|
60
|
+
heading_level = $1.length ## count number of = for heading level
|
61
|
+
heading = $2.strip
|
62
|
+
|
63
|
+
puts "heading #{heading_level} >#{heading}<"
|
64
|
+
|
65
|
+
if heading_level > 1
|
66
|
+
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
67
|
+
exit 1
|
68
|
+
end
|
69
|
+
|
70
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
71
|
+
## Österreich • Austria (at)
|
72
|
+
## Österreich • Austria
|
73
|
+
## Austria
|
74
|
+
## Deutschland (de) • Germany
|
75
|
+
country = world.countries.parse( heading )
|
76
|
+
## check country code - MUST exist for now!!!!
|
77
|
+
if country.nil?
|
78
|
+
puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
|
82
|
+
last_country = country
|
83
|
+
pp last_country
|
84
|
+
else
|
85
|
+
## strip and squish (white)spaces
|
86
|
+
# e.g. New York FC (2011-) => New York FC (2011-)
|
87
|
+
value = line.strip.gsub( /[ \t]+/, ' ' )
|
88
|
+
|
89
|
+
## normalize (allow underscore (-) - replace with space)
|
90
|
+
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
91
|
+
value = value.gsub( '_', ' ' )
|
92
|
+
|
93
|
+
if last_country.nil?
|
94
|
+
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
|
98
|
+
rec = WikiClub.new( value, last_country )
|
99
|
+
recs << rec
|
100
|
+
end
|
101
|
+
end # each_line
|
102
|
+
recs
|
103
|
+
end # method read
|
104
|
+
|
105
|
+
end # class WikiReader
|
106
|
+
|
107
|
+
end ## module Import
|
108
|
+
end ## module SportDb
|
@@ -1,13 +1,10 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
1
|
module SportDb
|
5
|
-
|
2
|
+
module Module
|
6
3
|
module Formats
|
7
4
|
|
8
5
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
9
|
-
MINOR =
|
10
|
-
PATCH =
|
6
|
+
MINOR = 2
|
7
|
+
PATCH = 0
|
11
8
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
12
9
|
|
13
10
|
def self.version
|
@@ -15,7 +12,7 @@ module Formats
|
|
15
12
|
end
|
16
13
|
|
17
14
|
def self.banner
|
18
|
-
"sportdb-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
"sportdb-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
19
16
|
end
|
20
17
|
|
21
18
|
def self.root
|