sportdb-formats 1.1.6 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +37 -1
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +102 -12
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,203 +1,203 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
module Import
|
6
|
-
|
7
|
-
|
8
|
-
class ClubHistoryReader
|
9
|
-
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
-
parse( txt )
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.parse( txt )
|
20
|
-
new( txt ).parse
|
21
|
-
end
|
22
|
-
|
23
|
-
def initialize( txt )
|
24
|
-
@txt = txt
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
###
|
29
|
-
## RENAME/RENAMED
|
30
|
-
## MOVE/MOVED
|
31
|
-
## BANKRUPT/BANKRUPTED
|
32
|
-
## REFORM/REFORMED
|
33
|
-
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
-
|
35
|
-
|
36
|
-
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
-
MOVED?|
|
38
|
-
BANKRUPT(?:ED)?|
|
39
|
-
REFORM(?:ED)?|
|
40
|
-
MERGED?
|
41
|
-
)
|
42
|
-
[ ]+
|
43
|
-
(?<text>.*) # rest of text
|
44
|
-
$
|
45
|
-
}x
|
46
|
-
|
47
|
-
|
48
|
-
def parse
|
49
|
-
recs = []
|
50
|
-
last_rec = nil
|
51
|
-
|
52
|
-
last_country = nil
|
53
|
-
last_season = nil
|
54
|
-
last_keyword = nil
|
55
|
-
last_teams = []
|
56
|
-
|
57
|
-
OutlineReader.parse( @txt ).each do |node|
|
58
|
-
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
-
heading_level = node[0][1].to_i
|
60
|
-
heading = node[1]
|
61
|
-
|
62
|
-
puts "heading #{heading_level} >#{heading}<"
|
63
|
-
|
64
|
-
|
65
|
-
if heading_level == 1
|
66
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
-
## Österreich • Austria (at)
|
68
|
-
## Österreich • Austria
|
69
|
-
## Austria
|
70
|
-
## Deutschland (de) • Germany
|
71
|
-
country =
|
72
|
-
## check country code - MUST exist for now!!!!
|
73
|
-
if country.nil?
|
74
|
-
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
-
exit 1
|
76
|
-
end
|
77
|
-
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
-
last_country = country
|
79
|
-
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
-
last_keyword = nil
|
81
|
-
elsif heading_level == 2
|
82
|
-
## assume season
|
83
|
-
season = Season.parse( heading )
|
84
|
-
puts " season >#{heading}< => #{season.key}"
|
85
|
-
last_season = season ## reset "lowwer levels" - keyword
|
86
|
-
last_keyword = nil
|
87
|
-
else
|
88
|
-
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
-
exit 1
|
90
|
-
end
|
91
|
-
|
92
|
-
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
-
if last_country.nil?
|
94
|
-
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
-
exit 1
|
96
|
-
end
|
97
|
-
if last_season.nil?
|
98
|
-
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
-
exit 1
|
100
|
-
end
|
101
|
-
|
102
|
-
lines = node[1]
|
103
|
-
lines.each do |line|
|
104
|
-
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
-
keyword = m[:keyword]
|
106
|
-
line = m[:text].strip
|
107
|
-
|
108
|
-
puts " keyword #{keyword}"
|
109
|
-
last_keyword = case keyword ## "normalize" keywords
|
110
|
-
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
-
'BANKRUPT'
|
112
|
-
when 'RENAME', 'RENAMED'
|
113
|
-
'RENAME'
|
114
|
-
when 'REFORM', 'REFORMED'
|
115
|
-
'REFORM'
|
116
|
-
when 'MOVE', 'MOVED'
|
117
|
-
'MOVE'
|
118
|
-
when 'MERGE', 'MERGED'
|
119
|
-
'MERGE'
|
120
|
-
else
|
121
|
-
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
-
exit 1
|
123
|
-
end
|
124
|
-
|
125
|
-
last_teams = []
|
126
|
-
end
|
127
|
-
|
128
|
-
if last_keyword.nil?
|
129
|
-
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
-
puts line
|
131
|
-
exit 1
|
132
|
-
end
|
133
|
-
|
134
|
-
if last_keyword == 'BANKRUPT'
|
135
|
-
## requires / expects one team in one line
|
136
|
-
recs << [ last_keyword, last_season.key,
|
137
|
-
[ squish(line), last_country.key ]
|
138
|
-
]
|
139
|
-
elsif last_keyword == 'RENAME' ||
|
140
|
-
last_keyword == 'REFORM' ||
|
141
|
-
last_keyword == 'MOVE'
|
142
|
-
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
-
teams = line.split( '⇒' )
|
144
|
-
if teams.size != 2
|
145
|
-
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
-
pp teams
|
147
|
-
exit 1
|
148
|
-
end
|
149
|
-
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
-
recs << [ last_keyword, last_season.key,
|
151
|
-
[ teams[0], last_country.key ],
|
152
|
-
[ teams[1], last_country.key ]
|
153
|
-
]
|
154
|
-
elsif last_keyword == 'MERGE'
|
155
|
-
## check if line starts with separator
|
156
|
-
## otherwise collect to be merged teams
|
157
|
-
if line.start_with?( '⇒' )
|
158
|
-
if last_teams.size < 2
|
159
|
-
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
-
pp last_teams
|
161
|
-
exit 1
|
162
|
-
end
|
163
|
-
## auto-add country to all teams
|
164
|
-
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
-
recs << [ last_keyword, last_season.key,
|
166
|
-
teams,
|
167
|
-
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
-
]
|
169
|
-
|
170
|
-
last_teams = []
|
171
|
-
else
|
172
|
-
last_teams << squish(line)
|
173
|
-
end
|
174
|
-
else
|
175
|
-
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
-
exit 1
|
177
|
-
end
|
178
|
-
end # each line (in paragraph)
|
179
|
-
else
|
180
|
-
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
-
pp node
|
182
|
-
exit 1
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
recs
|
187
|
-
end # method read
|
188
|
-
|
189
|
-
|
190
|
-
###############
|
191
|
-
## helper
|
192
|
-
|
193
|
-
def squish( str )
|
194
|
-
## colapse all whitespace to one
|
195
|
-
str.gsub( /[ ]+/,' ' )
|
196
|
-
end
|
197
|
-
|
198
|
-
|
199
|
-
end # class ClubHistoryReader
|
200
|
-
|
201
|
-
|
202
|
-
end ## module Import
|
203
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class ClubHistoryReader
|
9
|
+
|
10
|
+
def world() Import.world; end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
+
parse( txt )
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse( txt )
|
20
|
+
new( txt ).parse
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize( txt )
|
24
|
+
@txt = txt
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## RENAME/RENAMED
|
30
|
+
## MOVE/MOVED
|
31
|
+
## BANKRUPT/BANKRUPTED
|
32
|
+
## REFORM/REFORMED
|
33
|
+
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
+
|
35
|
+
|
36
|
+
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
+
MOVED?|
|
38
|
+
BANKRUPT(?:ED)?|
|
39
|
+
REFORM(?:ED)?|
|
40
|
+
MERGED?
|
41
|
+
)
|
42
|
+
[ ]+
|
43
|
+
(?<text>.*) # rest of text
|
44
|
+
$
|
45
|
+
}x
|
46
|
+
|
47
|
+
|
48
|
+
def parse
|
49
|
+
recs = []
|
50
|
+
last_rec = nil
|
51
|
+
|
52
|
+
last_country = nil
|
53
|
+
last_season = nil
|
54
|
+
last_keyword = nil
|
55
|
+
last_teams = []
|
56
|
+
|
57
|
+
OutlineReader.parse( @txt ).each do |node|
|
58
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
+
heading_level = node[0][1].to_i
|
60
|
+
heading = node[1]
|
61
|
+
|
62
|
+
puts "heading #{heading_level} >#{heading}<"
|
63
|
+
|
64
|
+
|
65
|
+
if heading_level == 1
|
66
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
+
## Österreich • Austria (at)
|
68
|
+
## Österreich • Austria
|
69
|
+
## Austria
|
70
|
+
## Deutschland (de) • Germany
|
71
|
+
country = world.countries.parse( heading )
|
72
|
+
## check country code - MUST exist for now!!!!
|
73
|
+
if country.nil?
|
74
|
+
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
+
last_country = country
|
79
|
+
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
+
last_keyword = nil
|
81
|
+
elsif heading_level == 2
|
82
|
+
## assume season
|
83
|
+
season = Season.parse( heading )
|
84
|
+
puts " season >#{heading}< => #{season.key}"
|
85
|
+
last_season = season ## reset "lowwer levels" - keyword
|
86
|
+
last_keyword = nil
|
87
|
+
else
|
88
|
+
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
+
exit 1
|
90
|
+
end
|
91
|
+
|
92
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
+
if last_country.nil?
|
94
|
+
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
if last_season.nil?
|
98
|
+
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
+
exit 1
|
100
|
+
end
|
101
|
+
|
102
|
+
lines = node[1]
|
103
|
+
lines.each do |line|
|
104
|
+
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
+
keyword = m[:keyword]
|
106
|
+
line = m[:text].strip
|
107
|
+
|
108
|
+
puts " keyword #{keyword}"
|
109
|
+
last_keyword = case keyword ## "normalize" keywords
|
110
|
+
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
+
'BANKRUPT'
|
112
|
+
when 'RENAME', 'RENAMED'
|
113
|
+
'RENAME'
|
114
|
+
when 'REFORM', 'REFORMED'
|
115
|
+
'REFORM'
|
116
|
+
when 'MOVE', 'MOVED'
|
117
|
+
'MOVE'
|
118
|
+
when 'MERGE', 'MERGED'
|
119
|
+
'MERGE'
|
120
|
+
else
|
121
|
+
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
last_teams = []
|
126
|
+
end
|
127
|
+
|
128
|
+
if last_keyword.nil?
|
129
|
+
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
+
puts line
|
131
|
+
exit 1
|
132
|
+
end
|
133
|
+
|
134
|
+
if last_keyword == 'BANKRUPT'
|
135
|
+
## requires / expects one team in one line
|
136
|
+
recs << [ last_keyword, last_season.key,
|
137
|
+
[ squish(line), last_country.key ]
|
138
|
+
]
|
139
|
+
elsif last_keyword == 'RENAME' ||
|
140
|
+
last_keyword == 'REFORM' ||
|
141
|
+
last_keyword == 'MOVE'
|
142
|
+
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
+
teams = line.split( '⇒' )
|
144
|
+
if teams.size != 2
|
145
|
+
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
+
pp teams
|
147
|
+
exit 1
|
148
|
+
end
|
149
|
+
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
+
recs << [ last_keyword, last_season.key,
|
151
|
+
[ teams[0], last_country.key ],
|
152
|
+
[ teams[1], last_country.key ]
|
153
|
+
]
|
154
|
+
elsif last_keyword == 'MERGE'
|
155
|
+
## check if line starts with separator
|
156
|
+
## otherwise collect to be merged teams
|
157
|
+
if line.start_with?( '⇒' )
|
158
|
+
if last_teams.size < 2
|
159
|
+
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
+
pp last_teams
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
## auto-add country to all teams
|
164
|
+
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
+
recs << [ last_keyword, last_season.key,
|
166
|
+
teams,
|
167
|
+
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
+
]
|
169
|
+
|
170
|
+
last_teams = []
|
171
|
+
else
|
172
|
+
last_teams << squish(line)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
+
exit 1
|
177
|
+
end
|
178
|
+
end # each line (in paragraph)
|
179
|
+
else
|
180
|
+
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
+
pp node
|
182
|
+
exit 1
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
recs
|
187
|
+
end # method read
|
188
|
+
|
189
|
+
|
190
|
+
###############
|
191
|
+
## helper
|
192
|
+
|
193
|
+
def squish( str )
|
194
|
+
## colapse all whitespace to one
|
195
|
+
str.gsub( /[ ]+/,' ' )
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
end # class ClubHistoryReader
|
200
|
+
|
201
|
+
|
202
|
+
end ## module Import
|
203
|
+
end ## module SportDb
|
@@ -1,108 +1,108 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
module Import
|
6
|
-
|
7
|
-
|
8
|
-
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
-
|
10
|
-
class WikiClub # nested class
|
11
|
-
attr_reader :name, :country
|
12
|
-
def initialize( name, country )
|
13
|
-
@name, @country = name, country
|
14
|
-
end
|
15
|
-
end # (nested) class WikiClub
|
16
|
-
|
17
|
-
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
22
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
23
|
-
parse( txt )
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.parse( txt )
|
27
|
-
new( txt ).parse
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize( txt )
|
31
|
-
@txt = txt
|
32
|
-
end
|
33
|
-
|
34
|
-
def parse
|
35
|
-
recs = []
|
36
|
-
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
37
|
-
|
38
|
-
@txt.each_line do |line|
|
39
|
-
line = line.strip
|
40
|
-
|
41
|
-
next if line.empty?
|
42
|
-
next if line.start_with?( '#' ) ## skip comments too
|
43
|
-
|
44
|
-
## strip inline (until end-of-line) comments too
|
45
|
-
## e.g Eupen => KAS Eupen, ## [de]
|
46
|
-
## => Eupen => KAS Eupen,
|
47
|
-
line = line.sub( /#.*/, '' ).strip
|
48
|
-
pp line
|
49
|
-
|
50
|
-
|
51
|
-
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
52
|
-
|
53
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
54
|
-
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
55
|
-
if line =~ /^(={1,}) ## leading ======
|
56
|
-
([^=]+?) ## text (note: for now no "inline" = allowed)
|
57
|
-
=* ## (optional) trailing ====
|
58
|
-
$/x
|
59
|
-
heading_marker = $1
|
60
|
-
heading_level = $1.length ## count number of = for heading level
|
61
|
-
heading = $2.strip
|
62
|
-
|
63
|
-
puts "heading #{heading_level} >#{heading}<"
|
64
|
-
|
65
|
-
if heading_level > 1
|
66
|
-
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
67
|
-
exit 1
|
68
|
-
end
|
69
|
-
|
70
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
71
|
-
## Österreich • Austria (at)
|
72
|
-
## Österreich • Austria
|
73
|
-
## Austria
|
74
|
-
## Deutschland (de) • Germany
|
75
|
-
country =
|
76
|
-
## check country code - MUST exist for now!!!!
|
77
|
-
if country.nil?
|
78
|
-
puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
79
|
-
exit 1
|
80
|
-
end
|
81
|
-
|
82
|
-
last_country = country
|
83
|
-
pp last_country
|
84
|
-
else
|
85
|
-
## strip and squish (white)spaces
|
86
|
-
# e.g. New York FC (2011-) => New York FC (2011-)
|
87
|
-
value = line.strip.gsub( /[ \t]+/, ' ' )
|
88
|
-
|
89
|
-
## normalize (allow underscore (-) - replace with space)
|
90
|
-
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
91
|
-
value = value.gsub( '_', ' ' )
|
92
|
-
|
93
|
-
if last_country.nil?
|
94
|
-
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
95
|
-
exit 1
|
96
|
-
end
|
97
|
-
|
98
|
-
rec = WikiClub.new( value, last_country )
|
99
|
-
recs << rec
|
100
|
-
end
|
101
|
-
end # each_line
|
102
|
-
recs
|
103
|
-
end # method read
|
104
|
-
|
105
|
-
end # class WikiReader
|
106
|
-
|
107
|
-
end ## module Import
|
108
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
+
|
10
|
+
class WikiClub # nested class
|
11
|
+
attr_reader :name, :country
|
12
|
+
def initialize( name, country )
|
13
|
+
@name, @country = name, country
|
14
|
+
end
|
15
|
+
end # (nested) class WikiClub
|
16
|
+
|
17
|
+
|
18
|
+
def world() Import.world; end
|
19
|
+
|
20
|
+
|
21
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
22
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
23
|
+
parse( txt )
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse( txt )
|
27
|
+
new( txt ).parse
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize( txt )
|
31
|
+
@txt = txt
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse
|
35
|
+
recs = []
|
36
|
+
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
37
|
+
|
38
|
+
@txt.each_line do |line|
|
39
|
+
line = line.strip
|
40
|
+
|
41
|
+
next if line.empty?
|
42
|
+
next if line.start_with?( '#' ) ## skip comments too
|
43
|
+
|
44
|
+
## strip inline (until end-of-line) comments too
|
45
|
+
## e.g Eupen => KAS Eupen, ## [de]
|
46
|
+
## => Eupen => KAS Eupen,
|
47
|
+
line = line.sub( /#.*/, '' ).strip
|
48
|
+
pp line
|
49
|
+
|
50
|
+
|
51
|
+
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
52
|
+
|
53
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
54
|
+
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
55
|
+
if line =~ /^(={1,}) ## leading ======
|
56
|
+
([^=]+?) ## text (note: for now no "inline" = allowed)
|
57
|
+
=* ## (optional) trailing ====
|
58
|
+
$/x
|
59
|
+
heading_marker = $1
|
60
|
+
heading_level = $1.length ## count number of = for heading level
|
61
|
+
heading = $2.strip
|
62
|
+
|
63
|
+
puts "heading #{heading_level} >#{heading}<"
|
64
|
+
|
65
|
+
if heading_level > 1
|
66
|
+
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
67
|
+
exit 1
|
68
|
+
end
|
69
|
+
|
70
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
71
|
+
## Österreich • Austria (at)
|
72
|
+
## Österreich • Austria
|
73
|
+
## Austria
|
74
|
+
## Deutschland (de) • Germany
|
75
|
+
country = world.countries.parse( heading )
|
76
|
+
## check country code - MUST exist for now!!!!
|
77
|
+
if country.nil?
|
78
|
+
puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
|
82
|
+
last_country = country
|
83
|
+
pp last_country
|
84
|
+
else
|
85
|
+
## strip and squish (white)spaces
|
86
|
+
# e.g. New York FC (2011-) => New York FC (2011-)
|
87
|
+
value = line.strip.gsub( /[ \t]+/, ' ' )
|
88
|
+
|
89
|
+
## normalize (allow underscore (-) - replace with space)
|
90
|
+
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
91
|
+
value = value.gsub( '_', ' ' )
|
92
|
+
|
93
|
+
if last_country.nil?
|
94
|
+
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
|
98
|
+
rec = WikiClub.new( value, last_country )
|
99
|
+
recs << rec
|
100
|
+
end
|
101
|
+
end # each_line
|
102
|
+
recs
|
103
|
+
end # method read
|
104
|
+
|
105
|
+
end # class WikiReader
|
106
|
+
|
107
|
+
end ## module Import
|
108
|
+
end ## module SportDb
|
@@ -1,13 +1,10 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
1
|
module SportDb
|
5
|
-
|
2
|
+
module Module
|
6
3
|
module Formats
|
7
4
|
|
8
5
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
9
|
-
MINOR =
|
10
|
-
PATCH =
|
6
|
+
MINOR = 2
|
7
|
+
PATCH = 0
|
11
8
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
12
9
|
|
13
10
|
def self.version
|
@@ -15,7 +12,7 @@ module Formats
|
|
15
12
|
end
|
16
13
|
|
17
14
|
def self.banner
|
18
|
-
"sportdb-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
"sportdb-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
19
16
|
end
|
20
17
|
|
21
18
|
def self.root
|