sportdb-config 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 064b7f927038794c7d1efecde27bbba3d7ac78fa
4
- data.tar.gz: 0dc83a80b70db77bca25e9921182fc4a1aea0466
3
+ metadata.gz: 5cc9b8eea5116ab13e92c7a5a6b3b7962fa3f1c1
4
+ data.tar.gz: a94f4452d3398743f2e6283cead72ccc552d2ec0
5
5
  SHA512:
6
- metadata.gz: 75d1416b8976f6bd619de196265a7ba1ac3f88c020b28489a4302732272f0e1b686f76a2dcf877e40364e240d148bf0853ba643466ff86d454060e45c9dd4da3
7
- data.tar.gz: 1fdc13cde4020f2bc2a21da04b7df6ce25810f4f282a477862adf2af789b17d6fa009e08163fc163579478aed0ca9210588aafb09f728d2472a9711bdec90c1a
6
+ metadata.gz: 6db0746eda38483b0bfcc241730044805c9332a0f2b27ad2aa5b96586371121579c8ef75a1903d482a7c1f68534a4231877a24fedaf87a866399e9f2105439f5
7
+ data.tar.gz: c5803e3626a36b8949d6b774e2b43748c27fa9c819a7ac9984c633f613f8b81ad1c9a026c0bafc3f1c66023d75b937675d6196ea01167b47f89ece4c14dde6fe
@@ -1,9 +1,9 @@
1
-
2
-
3
- 1 => ligue1, French Ligue 1
4
- 2 => ligue2, French Ligue 2
5
-
6
-
7
- [2001-02] ## until (including) 2001-02 season
8
- 1 => division1, ? ## use championat or something? check official name
9
- 2 => division2, ?
1
+
2
+
3
+ 1 => ligue1, French Ligue 1
4
+ 2 => ligue2, French Ligue 2
5
+
6
+
7
+ [2001-02] ## until (including) 2001-02 season
8
+ 1 => division1, ? ## use championat or something? check official name
9
+ 2 => division2, ?
@@ -1,7 +1,7 @@
1
-
2
-
3
- 1 => superleague, Greek Superleague
4
-
5
-
6
- [2005-06] ## until (including) 2005-06 season
7
- 1 => alphaethniki, Greek Alpha Ethniki
1
+
2
+
3
+ 1 => superleague, Greek Superleague
4
+
5
+
6
+ [2005-06] ## until (including) 2005-06 season
7
+ 1 => alphaethniki, Greek Alpha Ethniki
@@ -1,19 +1,19 @@
1
-
2
- 1 => premiership, Scotish Premiership # starting w/ 2013-14 season
3
- 2 => championship, Scotish Championship
4
- 3 => league1, Scotish League One
5
- 4 => league2, Scotish League Two
6
-
7
-
8
- [2012-13] ## until (including) 2012-13 season
9
- 1 => premierleague, Scotish Premiership
10
- 2 => division1, Scotish 1st Division
11
- 3 => division2, Scotish League One
12
- 4 => division3, Scotish League Two
13
-
14
-
15
- [1997-98] ## until (including) season
16
- 1 => premierdivision, Scotish Premier Division
17
- 2 => division1, Scotish 1st Division
18
- 3 => division2, Scotish 2nd Division
19
- 4 => division3, Scotish 3rd Division
1
+
2
+ 1 => premiership, Scotish Premiership # starting w/ 2013-14 season
3
+ 2 => championship, Scotish Championship
4
+ 3 => league1, Scotish League One
5
+ 4 => league2, Scotish League Two
6
+
7
+
8
+ [2012-13] ## until (including) 2012-13 season
9
+ 1 => premierleague, Scotish Premiership
10
+ 2 => division1, Scotish 1st Division
11
+ 3 => division2, Scotish League One
12
+ 4 => division3, Scotish League Two
13
+
14
+
15
+ [1997-98] ## until (including) season
16
+ 1 => premierdivision, Scotish Premier Division
17
+ 2 => division1, Scotish 1st Division
18
+ 3 => division2, Scotish 2nd Division
19
+ 4 => division3, Scotish 3rd Division
@@ -1,162 +1,162 @@
1
- ##
2
- # what name? use regions or maps or geos or zones or __?
3
-
4
- #
5
- # england
6
- # see https://en.wikipedia.org/wiki/Subdivisions_of_England
7
- # see https://www.bbc.co.uk/news/england/regions
8
-
9
- ##
10
- # todo: for sort order - allow different sort name
11
- # e.g. East Sussex => Sussex (East) or Sussex, East or something
12
- # use <> for marking what counts for sorting
13
- # e.g East <Sussex> => get auto-converted to Sussex, East - why? why not?
14
- #
15
- # more examples:
16
- # North ‹Yorkshire› => Yorkshire (North) or Yorkshire, North
17
- # Greater ‹London› => London, Greater
18
- # Greater ‹Manchaster› => Manchaster, Greater
19
-
20
-
21
- ##
22
- # note: uses all regions following the camra good beer guide (book)
23
- #
24
-
25
- ======================================
26
- == North West England ==
27
-
28
- # Cumbria
29
- # Lancashire
30
- # Liverpool
31
- # Manchester
32
-
33
- Cheshire
34
- Cumbria
35
- Lancashire
36
- Greater Manchester | Manchester, Greater
37
- Liverpool & Merseyside | Merseyside ## note: added Liverpool & - why? why not?
38
-
39
-
40
- ===================================
41
- == North East England
42
-
43
- # Tees
44
- # Tyne & Wear
45
-
46
- Durham
47
- Newcastle & Northumberland | Northumberland ## note: added Newcastle & - why? why not?
48
- Tyne and Wear
49
-
50
-
51
- =====================================
52
- == Yorkshire & Lincolnshire ==
53
-
54
- # Humberside
55
- # Leeds & West Yorkshire
56
- # Lincolnshire
57
- # Sheffield & South Yorkshire
58
- # York & North Yorkshire
59
-
60
- Lincolnshire
61
- West Yorkshire | Yorkshire, West | Yorkshire (West)
62
- South Yorkshire | Yorkshire, South | Yorkshire (South)
63
- North Yorkshire | Yorkshire, North | Yorkshire (North)
64
- East Yorkshire | Yorkshire, East | Yorkshire (East)
65
-
66
-
67
- ==================================
68
- == West Midlands
69
-
70
- # Birmingham & Black Country
71
- # Coventry & Warwickshire
72
- # Hereford & Worcester
73
- # Shropshire
74
- # Stoke & Staffordshire
75
-
76
- Herefordshire
77
- Shropshire
78
- Staffordshire
79
- Warwickshire
80
- Birmingham & West Midlands | West Midlands ## note: added Birmingham - why? why not?
81
- Worcestershire
82
-
83
-
84
- ==================================
85
- == East Midlands
86
-
87
- # Derby
88
- # Leicester
89
- # Northampton
90
- # Nottingham
91
-
92
- Derbyshire
93
- Leicestershire
94
- Northamptonshire
95
- Nottinghamshire
96
- Rutland
97
-
98
-
99
- ===================================
100
- == West & South West
101
-
102
- # Bristol
103
- # Cornwall
104
- # Devon
105
- # Gloucestershire
106
- # Somerset
107
- # Wiltshire
108
-
109
-
110
- Cornwall
111
- Devon
112
- Bristol & Gloucestershire | Gloucestershire
113
- Somerset
114
- Wiltshire
115
-
116
-
117
- =================================
118
- == East
119
-
120
- # Beds, Herts & Bucks
121
- # Cambridgeshire
122
- # Essex
123
- # Norfolk
124
- # Suffolk
125
-
126
- Bedfordshire
127
- Hertfordshire
128
- Buckinghamshire
129
- Cambridgeshire
130
- Essex
131
- Norfolk
132
- Suffolk
133
-
134
-
135
- ===============================
136
- == South
137
-
138
- # Berkshire
139
- # Dorset
140
- # Hampshire & Isle of Wight
141
- # Oxford
142
-
143
- Berkshire
144
- Dorset
145
- Hampshire
146
- Oxfordshire
147
-
148
-
149
-
150
- ===============================
151
- == London & South East
152
-
153
- # Kent
154
- # London
155
- # Surrey
156
- # Sussex
157
-
158
- Greater London | London, Greater
159
- Kent
160
- Surrey
161
- East Sussex | Sussex, East | Sussex (East)
162
- West Sussex | Sussex, West | Sussex (West)
1
+ ##
2
+ # what name? use regions or maps or geos or zones or __?
3
+
4
+ #
5
+ # england
6
+ # see https://en.wikipedia.org/wiki/Subdivisions_of_England
7
+ # see https://www.bbc.co.uk/news/england/regions
8
+
9
+ ##
10
+ # todo: for sort order - allow different sort name
11
+ # e.g. East Sussex => Sussex (East) or Sussex, East or something
12
+ # use <> for marking what counts for sorting
13
+ # e.g East <Sussex> => get auto-converted to Sussex, East - why? why not?
14
+ #
15
+ # more examples:
16
+ # North ‹Yorkshire› => Yorkshire (North) or Yorkshire, North
17
+ # Greater ‹London› => London, Greater
18
+ # Greater ‹Manchaster› => Manchaster, Greater
19
+
20
+
21
+ ##
22
+ # note: uses all regions following the camra good beer guide (book)
23
+ #
24
+
25
+ ======================================
26
+ == North West England ==
27
+
28
+ # Cumbria
29
+ # Lancashire
30
+ # Liverpool
31
+ # Manchester
32
+
33
+ Cheshire
34
+ Cumbria
35
+ Lancashire
36
+ Greater Manchester | Manchester, Greater
37
+ Liverpool & Merseyside | Merseyside ## note: added Liverpool & - why? why not?
38
+
39
+
40
+ ===================================
41
+ == North East England
42
+
43
+ # Tees
44
+ # Tyne & Wear
45
+
46
+ Durham
47
+ Newcastle & Northumberland | Northumberland ## note: added Newcastle & - why? why not?
48
+ Tyne and Wear
49
+
50
+
51
+ =====================================
52
+ == Yorkshire & Lincolnshire ==
53
+
54
+ # Humberside
55
+ # Leeds & West Yorkshire
56
+ # Lincolnshire
57
+ # Sheffield & South Yorkshire
58
+ # York & North Yorkshire
59
+
60
+ Lincolnshire
61
+ West Yorkshire | Yorkshire, West | Yorkshire (West)
62
+ South Yorkshire | Yorkshire, South | Yorkshire (South)
63
+ North Yorkshire | Yorkshire, North | Yorkshire (North)
64
+ East Yorkshire | Yorkshire, East | Yorkshire (East)
65
+
66
+
67
+ ==================================
68
+ == West Midlands
69
+
70
+ # Birmingham & Black Country
71
+ # Coventry & Warwickshire
72
+ # Hereford & Worcester
73
+ # Shropshire
74
+ # Stoke & Staffordshire
75
+
76
+ Herefordshire
77
+ Shropshire
78
+ Staffordshire
79
+ Warwickshire
80
+ Birmingham & West Midlands | West Midlands ## note: added Birmingham - why? why not?
81
+ Worcestershire
82
+
83
+
84
+ ==================================
85
+ == East Midlands
86
+
87
+ # Derby
88
+ # Leicester
89
+ # Northampton
90
+ # Nottingham
91
+
92
+ Derbyshire
93
+ Leicestershire
94
+ Northamptonshire
95
+ Nottinghamshire
96
+ Rutland
97
+
98
+
99
+ ===================================
100
+ == West & South West
101
+
102
+ # Bristol
103
+ # Cornwall
104
+ # Devon
105
+ # Gloucestershire
106
+ # Somerset
107
+ # Wiltshire
108
+
109
+
110
+ Cornwall
111
+ Devon
112
+ Bristol & Gloucestershire | Gloucestershire
113
+ Somerset
114
+ Wiltshire
115
+
116
+
117
+ =================================
118
+ == East
119
+
120
+ # Beds, Herts & Bucks
121
+ # Cambridgeshire
122
+ # Essex
123
+ # Norfolk
124
+ # Suffolk
125
+
126
+ Bedfordshire
127
+ Hertfordshire
128
+ Buckinghamshire
129
+ Cambridgeshire
130
+ Essex
131
+ Norfolk
132
+ Suffolk
133
+
134
+
135
+ ===============================
136
+ == South
137
+
138
+ # Berkshire
139
+ # Dorset
140
+ # Hampshire & Isle of Wight
141
+ # Oxford
142
+
143
+ Berkshire
144
+ Dorset
145
+ Hampshire
146
+ Oxfordshire
147
+
148
+
149
+
150
+ ===============================
151
+ == London & South East
152
+
153
+ # Kent
154
+ # London
155
+ # Surrey
156
+ # Sussex
157
+
158
+ Greater London | London, Greater
159
+ Kent
160
+ Surrey
161
+ East Sussex | Sussex, East | Sussex (East)
162
+ West Sussex | Sussex, West | Sussex (West)
@@ -1,278 +1,278 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class ClubReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ).read
13
- parse( txt )
14
- end
15
-
16
-
17
- def self.parse( txt )
18
- recs = []
19
- last_rec = nil
20
- headings = [] ## headings stack
21
-
22
- txt.each_line do |line|
23
- line = line.strip
24
-
25
- next if line.empty?
26
- next if line.start_with?( '#' ) ## skip comments too
27
-
28
- ## strip inline (until end-of-line) comments too
29
- ## e.g Eupen => KAS Eupen, ## [de]
30
- ## => Eupen => KAS Eupen,
31
- line = line.sub( /#.*/, '' ).strip
32
- pp line
33
-
34
-
35
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
36
-
37
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
38
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
39
- if line =~ /^(={1,}) ## leading ======
40
- ([^=]+?) ## text (note: for now no "inline" = allowed)
41
- =* ## (optional) trailing ====
42
- $/x
43
- heading_marker = $1
44
- heading_level = $1.length ## count number of = for heading level
45
- heading = $2.strip
46
-
47
- puts "heading #{heading_level} >#{heading}<"
48
-
49
- ## 1) first pop headings if present
50
- while headings.size+1 > heading_level
51
- headings.pop
52
- end
53
-
54
- ## 2) add missing (hierarchy) level if
55
- while headings.size+1 < heading_level
56
- ## todo/fix: issue warning about "skipping" hierarchy level
57
- puts "!!! warn [team reader] - skipping hierarchy level in headings "
58
- headings.push( nil )
59
- end
60
-
61
- if heading =~ /^\?+$/ ## note: use ? or ?? or ?? to reset level to nil
62
- ## keep level empty
63
- else
64
-
65
- ## quick hack: if level is 1 assume country for now
66
- ## and extract country code e.g.
67
- ## Austria (at) => at
68
- ## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
69
- if heading_level == 1
70
- if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
71
- country_code = $1
72
-
73
- ## check country code - MUST exist for now!!!!
74
- country = SportDb::Import.config.countries[ country_code ]
75
- if country.nil?
76
- puts "!!! error [team reader] - unknown country with code >#{country_code}< - sorry - add country to config to fix"
77
- exit 1
78
- end
79
-
80
- headings.push( country_code )
81
- else
82
- puts "!!! error - heading level 1 - missing country code - >#{heading}<"
83
- exit 1
84
- end
85
- else
86
- ## quick hack:
87
- ## remove known fill/dummy words incl:
88
- ## Provincia San Juan => San Juan (see argentina, for example)
89
- ##
90
- ## use geo tree long term with alternative names - why? why not?
91
- words = ['Provincia']
92
- words.each { |word| heading = heading.gsub( word, '' ) }
93
- heading = heading.strip
94
-
95
- headings.push( heading )
96
- end
97
-
98
- ## assert that hierarchy level is ok
99
- if headings.size != heading_level
100
- puts "!!! error - headings hierarchy/stack out of order - #{heading.size}<=>#{heading_level}"
101
- exit 1
102
- end
103
- end
104
-
105
- pp headings
106
-
107
- elsif line.start_with?( '|' )
108
- ## assume continuation with line of alternative names
109
- ## note: skip leading pipe
110
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
111
- ## strip and squish (white)spaces
112
- # e.g. New York FC (2011-) => New York FC (2011-)
113
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
114
- last_rec.alt_names += values
115
- last_rec.add_variants( values ) # auto-add (possible) auto-generated variant names
116
-
117
- ## check for duplicates
118
- if last_rec.duplicates?
119
- duplicates = last_rec.duplicates
120
- puts "*** !!! WARN !!! - #{duplicates.size} duplicate alt name mapping(s):"
121
- pp duplicates
122
- pp last_rec
123
- ##
124
- ## todo/fix: make it only an error with exit 1
125
- ## if (not normalized) names are the same (not unique/uniq)
126
- ## e.g. don't exit on A.F.C. == AFC etc.
127
- ## exit 1
128
- end
129
- else
130
- values = line.split( ',' )
131
-
132
- rec = Club.new
133
- value = values.shift ## get first item
134
- ## strip and squish (white)spaces
135
- # e.g. New York FC (2011-) => New York FC (2011-)
136
- value = value.strip.gsub( /[ \t]+/, ' ' )
137
- rec.name = value # canoncial name (global unique "beautiful/long" name)
138
- rec.add_variants( value ) # auto-add (possible) auto-generated variant names
139
-
140
- ## note:
141
- ## check/todo!!!!!!!!!!!!!!!!!-
142
- ## strip year if to present e.g. (2011-)
143
- ##
144
- ## do NOT strip for defunct / historic clubs e.g.
145
- ## (1899-1910)
146
- ## or (-1914) or (-2011) etc.
147
-
148
- ###
149
- ## todo: move year out of canonical team name - why? why not?
150
-
151
- ## check if canonical name include (2011-) or similar in name
152
- ## if yes, remove (2011-) and add to (alt) names
153
- ## e.g. New York FC (2011) => New York FC
154
- if rec.name =~ /\(.+?\)/ ## note: use non-greedy (?) match
155
- name = rec.name.gsub( /\(.+?\)/, '' ).strip
156
-
157
- if rec.name =~ /\(([0-9]{4})-\)/ ## e.g. (2014-)
158
- rec.year = $1.to_i
159
- elsif rec.name =~ /\(-([0-9]{4})\)/ ## e.g. (-2014)
160
- rec.year_end = $1.to_i
161
- elsif rec.name =~ /\(([0-9]{4})-([0-9]{4})\)/ ## e.g. (2011-2014)
162
- rec.year = $1.to_i
163
- rec.year_end = $2.to_i
164
- else
165
- ## todo/check: warn about unknown year format
166
- end
167
- end
168
-
169
- ## todo/check - check for unknown format values
170
- ## e.g. too many values, duplicate years, etc.
171
- ## check for overwritting, etc.
172
- while values.size > 0
173
- value = values.shift
174
- ## strip and squish (white)spaces
175
- # e.g. León › Guanajuato => León › Guanajuato
176
- value = value.strip.gsub( /[ \t]+/, ' ' )
177
- if value =~/^\d{4}$/ # e.g 1904
178
- ## todo/check: issue warning if year is already set!!!!!!!
179
- if rec.year
180
- puts "!!! error - year already set to #{rec.year} - CANNOT overwrite with #{value}:"
181
- pp rec
182
- exit 1
183
- end
184
- rec.year = value.to_i
185
- elsif value.start_with?( '@' ) # e.g. @ Anfield
186
- ## cut-off leading @ and spaces
187
- rec.ground = value[1..-1].strip
188
- else
189
- ## assume city / geo tree
190
- ## split into geo tree
191
- geos = split_geo( value )
192
- city = geos[0]
193
- ## check for "embedded" district e.g. London (Fulham) or Hamburg (St. Pauli) etc.
194
- if city =~ /\((.+?)\)/ ## note: use non-greedy (?) match
195
- rec.district = $1.strip
196
- city = city.gsub( /\(.+?\)/, '' ).strip
197
- end
198
- rec.city = city
199
-
200
- if geos.size > 1
201
- ## cut-off city and keep the rest (of geo tree)
202
- rec.geos = geos[1..-1]
203
- end
204
- end
205
- end ## while values
206
-
207
-
208
- ###############
209
- ## use headings text for geo tree
210
-
211
- ## 1) add country if present
212
- if headings.size > 0 && headings[0]
213
- country = SportDb::Import.config.countries[ headings[0] ]
214
- rec.country = country
215
- else
216
- ## make it an error - why? why not?
217
- puts "!!! error - country missing in headings hierarchy - sorry - add to quicklist"
218
- exit 1
219
- end
220
-
221
- ## 2) check geo tree with headings hierarchy
222
- if headings.size > 1 && headings[1]
223
- geos = split_geo( headings[1] )
224
- if rec.geos
225
- if rec.geos[0] != geos[0]
226
- puts "!!! error - geo tree - headings mismatch >#{rec.geos[0]}< <=> >#{geos[0]}<"
227
- exit 1
228
- end
229
- if rec.geos[1] && rec.geos[1] != geos[1] ## check optional 2nd level too
230
- puts "!!! error - geo tree - headings mismatch >#{rec.geos[1]}< <=> >#{geos[1]}<"
231
- exit 1
232
- end
233
- else
234
- ## add missing region (state/province) from headings hierarchy
235
- rec.geos = geos
236
- end
237
- end
238
-
239
- last_rec = rec
240
-
241
-
242
- ### todo/fix:
243
- ## auto-add alt name with dots stripped - why? why not?
244
- ## e.g. D.C. United => DC United
245
- ## e.g. Liverpool F.C. => Liverpool FC
246
- ## e.g. St. Albin => St Albin etc.
247
- ## e.g. 1. FC Köln => 1 FC Köln -- make special case for 1. - why? why not?
248
-
249
- ##
250
- ## todo/fix: unify mapping entries
251
- ## always lowercase !!!! (case insensitive)
252
- ## always strip (2011-) !!!
253
- ## always strip dots (e.g. St., F.C, etc.)
254
-
255
- recs << rec
256
- end
257
- end # each_line
258
- recs
259
- end # method read
260
-
261
- ### helpers
262
- def self.split_geo( str )
263
- ## assume city / geo tree
264
- ## strip and squish (white)spaces
265
- # e.g. León › Guanajuato => León › Guanajuato
266
- str = str.strip.gsub( /[ \t]+/, ' ' )
267
-
268
- ## split into geo tree
269
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹
270
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
271
- geos
272
- end
273
-
274
- end # class ClubReader
275
-
276
-
277
- end ## module Import
278
- end ## module SportDb
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ).read
13
+ parse( txt )
14
+ end
15
+
16
+
17
+ def self.parse( txt )
18
+ recs = []
19
+ last_rec = nil
20
+ headings = [] ## headings stack
21
+
22
+ txt.each_line do |line|
23
+ line = line.strip
24
+
25
+ next if line.empty?
26
+ next if line.start_with?( '#' ) ## skip comments too
27
+
28
+ ## strip inline (until end-of-line) comments too
29
+ ## e.g Eupen => KAS Eupen, ## [de]
30
+ ## => Eupen => KAS Eupen,
31
+ line = line.sub( /#.*/, '' ).strip
32
+ pp line
33
+
34
+
35
+ next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
36
+
37
+ ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
38
+ ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
39
+ if line =~ /^(={1,}) ## leading ======
40
+ ([^=]+?) ## text (note: for now no "inline" = allowed)
41
+ =* ## (optional) trailing ====
42
+ $/x
43
+ heading_marker = $1
44
+ heading_level = $1.length ## count number of = for heading level
45
+ heading = $2.strip
46
+
47
+ puts "heading #{heading_level} >#{heading}<"
48
+
49
+ ## 1) first pop headings if present
50
+ while headings.size+1 > heading_level
51
+ headings.pop
52
+ end
53
+
54
+ ## 2) add missing (hierarchy) level if
55
+ while headings.size+1 < heading_level
56
+ ## todo/fix: issue warning about "skipping" hierarchy level
57
+ puts "!!! warn [team reader] - skipping hierarchy level in headings "
58
+ headings.push( nil )
59
+ end
60
+
61
+ if heading =~ /^\?+$/ ## note: use ? or ?? or ?? to reset level to nil
62
+ ## keep level empty
63
+ else
64
+
65
+ ## quick hack: if level is 1 assume country for now
66
+ ## and extract country code e.g.
67
+ ## Austria (at) => at
68
+ ## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
69
+ if heading_level == 1
70
+ if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
71
+ country_code = $1
72
+
73
+ ## check country code - MUST exist for now!!!!
74
+ country = SportDb::Import.config.countries[ country_code ]
75
+ if country.nil?
76
+ puts "!!! error [team reader] - unknown country with code >#{country_code}< - sorry - add country to config to fix"
77
+ exit 1
78
+ end
79
+
80
+ headings.push( country_code )
81
+ else
82
+ puts "!!! error - heading level 1 - missing country code - >#{heading}<"
83
+ exit 1
84
+ end
85
+ else
86
+ ## quick hack:
87
+ ## remove known fill/dummy words incl:
88
+ ## Provincia San Juan => San Juan (see argentina, for example)
89
+ ##
90
+ ## use geo tree long term with alternative names - why? why not?
91
+ words = ['Provincia']
92
+ words.each { |word| heading = heading.gsub( word, '' ) }
93
+ heading = heading.strip
94
+
95
+ headings.push( heading )
96
+ end
97
+
98
+ ## assert that hierarchy level is ok
99
+ if headings.size != heading_level
100
+ puts "!!! error - headings hierarchy/stack out of order - #{heading.size}<=>#{heading_level}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ pp headings
106
+
107
+ elsif line.start_with?( '|' )
108
+ ## assume continuation with line of alternative names
109
+ ## note: skip leading pipe
110
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
111
+ ## strip and squish (white)spaces
112
+ # e.g. New York FC (2011-) => New York FC (2011-)
113
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
114
+ last_rec.alt_names += values
115
+ last_rec.add_variants( values ) # auto-add (possible) auto-generated variant names
116
+
117
+ ## check for duplicates
118
+ if last_rec.duplicates?
119
+ duplicates = last_rec.duplicates
120
+ puts "*** !!! WARN !!! - #{duplicates.size} duplicate alt name mapping(s):"
121
+ pp duplicates
122
+ pp last_rec
123
+ ##
124
+ ## todo/fix: make it only an error with exit 1
125
+ ## if (not normalized) names are the same (not unique/uniq)
126
+ ## e.g. don't exit on A.F.C. == AFC etc.
127
+ ## exit 1
128
+ end
129
+ else
130
+ values = line.split( ',' )
131
+
132
+ rec = Club.new
133
+ value = values.shift ## get first item
134
+ ## strip and squish (white)spaces
135
+ # e.g. New York FC (2011-) => New York FC (2011-)
136
+ value = value.strip.gsub( /[ \t]+/, ' ' )
137
+ rec.name = value # canoncial name (global unique "beautiful/long" name)
138
+ rec.add_variants( value ) # auto-add (possible) auto-generated variant names
139
+
140
+ ## note:
141
+ ## check/todo!!!!!!!!!!!!!!!!!-
142
+ ## strip year if to present e.g. (2011-)
143
+ ##
144
+ ## do NOT strip for defunct / historic clubs e.g.
145
+ ## (1899-1910)
146
+ ## or (-1914) or (-2011) etc.
147
+
148
+ ###
149
+ ## todo: move year out of canonical team name - why? why not?
150
+
151
+ ## check if canonical name include (2011-) or similar in name
152
+ ## if yes, remove (2011-) and add to (alt) names
153
+ ## e.g. New York FC (2011) => New York FC
154
+ if rec.name =~ /\(.+?\)/ ## note: use non-greedy (?) match
155
+ name = rec.name.gsub( /\(.+?\)/, '' ).strip
156
+
157
+ if rec.name =~ /\(([0-9]{4})-\)/ ## e.g. (2014-)
158
+ rec.year = $1.to_i
159
+ elsif rec.name =~ /\(-([0-9]{4})\)/ ## e.g. (-2014)
160
+ rec.year_end = $1.to_i
161
+ elsif rec.name =~ /\(([0-9]{4})-([0-9]{4})\)/ ## e.g. (2011-2014)
162
+ rec.year = $1.to_i
163
+ rec.year_end = $2.to_i
164
+ else
165
+ ## todo/check: warn about unknown year format
166
+ end
167
+ end
168
+
169
+ ## todo/check - check for unknown format values
170
+ ## e.g. too many values, duplicate years, etc.
171
+ ## check for overwritting, etc.
172
+ while values.size > 0
173
+ value = values.shift
174
+ ## strip and squish (white)spaces
175
+ # e.g. León › Guanajuato => León › Guanajuato
176
+ value = value.strip.gsub( /[ \t]+/, ' ' )
177
+ if value =~/^\d{4}$/ # e.g 1904
178
+ ## todo/check: issue warning if year is already set!!!!!!!
179
+ if rec.year
180
+ puts "!!! error - year already set to #{rec.year} - CANNOT overwrite with #{value}:"
181
+ pp rec
182
+ exit 1
183
+ end
184
+ rec.year = value.to_i
185
+ elsif value.start_with?( '@' ) # e.g. @ Anfield
186
+ ## cut-off leading @ and spaces
187
+ rec.ground = value[1..-1].strip
188
+ else
189
+ ## assume city / geo tree
190
+ ## split into geo tree
191
+ geos = split_geo( value )
192
+ city = geos[0]
193
+ ## check for "embedded" district e.g. London (Fulham) or Hamburg (St. Pauli) etc.
194
+ if city =~ /\((.+?)\)/ ## note: use non-greedy (?) match
195
+ rec.district = $1.strip
196
+ city = city.gsub( /\(.+?\)/, '' ).strip
197
+ end
198
+ rec.city = city
199
+
200
+ if geos.size > 1
201
+ ## cut-off city and keep the rest (of geo tree)
202
+ rec.geos = geos[1..-1]
203
+ end
204
+ end
205
+ end ## while values
206
+
207
+
208
+ ###############
209
+ ## use headings text for geo tree
210
+
211
+ ## 1) add country if present
212
+ if headings.size > 0 && headings[0]
213
+ country = SportDb::Import.config.countries[ headings[0] ]
214
+ rec.country = country
215
+ else
216
+ ## make it an error - why? why not?
217
+ puts "!!! error - country missing in headings hierarchy - sorry - add to quicklist"
218
+ exit 1
219
+ end
220
+
221
+ ## 2) check geo tree with headings hierarchy
222
+ if headings.size > 1 && headings[1]
223
+ geos = split_geo( headings[1] )
224
+ if rec.geos
225
+ if rec.geos[0] != geos[0]
226
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[0]}< <=> >#{geos[0]}<"
227
+ exit 1
228
+ end
229
+ if rec.geos[1] && rec.geos[1] != geos[1] ## check optional 2nd level too
230
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[1]}< <=> >#{geos[1]}<"
231
+ exit 1
232
+ end
233
+ else
234
+ ## add missing region (state/province) from headings hierarchy
235
+ rec.geos = geos
236
+ end
237
+ end
238
+
239
+ last_rec = rec
240
+
241
+
242
+ ### todo/fix:
243
+ ## auto-add alt name with dots stripped - why? why not?
244
+ ## e.g. D.C. United => DC United
245
+ ## e.g. Liverpool F.C. => Liverpool FC
246
+ ## e.g. St. Albin => St Albin etc.
247
+ ## e.g. 1. FC Köln => 1 FC Köln -- make special case for 1. - why? why not?
248
+
249
+ ##
250
+ ## todo/fix: unify mapping entries
251
+ ## always lowercase !!!! (case insensitive)
252
+ ## always strip (2011-) !!!
253
+ ## always strip dots (e.g. St., F.C, etc.)
254
+
255
+ recs << rec
256
+ end
257
+ end # each_line
258
+ recs
259
+ end # method read
260
+
261
+ ### helpers
262
+ def self.split_geo( str )
263
+ ## assume city / geo tree
264
+ ## strip and squish (white)spaces
265
+ # e.g. León › Guanajuato => León › Guanajuato
266
+ str = str.strip.gsub( /[ \t]+/, ' ' )
267
+
268
+ ## split into geo tree
269
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹
270
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
271
+ geos
272
+ end
273
+
274
+ end # class ClubReader
275
+
276
+
277
+ end ## module Import
278
+ end ## module SportDb