worlddb 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/Manifest.txt +0 -2
  2. data/data/africa/countries.txt +63 -73
  3. data/data/america/ca/cities.txt +35 -35
  4. data/data/america/countries.txt +29 -24
  5. data/data/america/mx/cities.txt +18 -18
  6. data/data/america/us/cities.txt +39 -39
  7. data/data/asia/countries.txt +74 -64
  8. data/data/europe/at/cities.txt +31 -31
  9. data/data/europe/at/regions.txt +6 -6
  10. data/data/europe/be/cities.txt +10 -10
  11. data/data/europe/countries.txt +71 -53
  12. data/data/europe/cz/cities.txt +16 -16
  13. data/data/europe/de/cities.txt +28 -28
  14. data/data/europe/en/cities.txt +21 -19
  15. data/data/europe/es/cities.txt +10 -10
  16. data/data/europe/fr/cities.txt +11 -11
  17. data/data/europe/it/cities.txt +14 -14
  18. data/data/europe/lt/cities.txt +2 -3
  19. data/data/europe/lv/cities.txt +3 -2
  20. data/data/europe/nl/cities.txt +3 -3
  21. data/data/europe/pl/cities.txt +3 -3
  22. data/data/europe/ru/cities.txt +5 -5
  23. data/data/europe/sc/cities.txt +1 -1
  24. data/data/europe/ua/cities.txt +5 -5
  25. data/data/europe/wa/cities.txt +1 -1
  26. data/data/oceania/1_codes/fifa.yml +1 -1
  27. data/data/oceania/1_codes/internet.yml +1 -1
  28. data/data/oceania/1_codes/iso3.yml +1 -1
  29. data/data/oceania/3_more/en.wikipedia.yml +1 -1
  30. data/data/oceania/countries.txt +24 -15
  31. data/lib/worlddb/cli/runner.rb +2 -5
  32. data/lib/worlddb/reader.rb +91 -2
  33. data/lib/worlddb/readers/hash_reader.rb +23 -8
  34. data/lib/worlddb/readers/values_reader.rb +86 -8
  35. data/lib/worlddb/version.rb +1 -1
  36. data/lib/worlddb.rb +1 -13
  37. metadata +4 -6
  38. data/data/america/br/cities.txt +0 -9971
  39. data/lib/worlddb/loader.rb +0 -55
@@ -1,16 +1,16 @@
1
1
 
2
2
  ## top 10 cities
3
3
 
4
- madrid, Madrid, region:md, 3_265_038, m:5_427_000
5
- barcelona, Barcelona, region:ct, 1_615_448, m:4_223_000
6
- valencia, Valencia, region:vc, 798_033, m:810_000
7
- sevilla, Sevilla [Seville], region:an, 703_021, m:750_000
8
- zaragoza, Zaragoza, region:ar, 674_725
9
- malaga, Málaga, region:an, 568_030
10
- murcia, Murcia, region:mc, 442_203
11
- palma, Palma de Mallorca, region:ib, 405_318
12
- laspalmas, Las Palmas (de Gran Canaria), region:cn, 383_343
13
- bilbao, Bilbao, region:pv, 352_700, m:750_000
4
+ Madrid, region:md, 3_265_038, m:5_427_000
5
+ Barcelona, region:ct, 1_615_448, m:4_223_000
6
+ Valencia, region:vc, 798_033, m:810_000
7
+ Sevilla [Seville], region:an, 703_021, m:750_000
8
+ Zaragoza, region:ar, 674_725
9
+ Málaga, region:an, 568_030
10
+ Murcia, region:mc, 442_203
11
+ Palma (de Mallorca), region:ib, 405_318
12
+ Las Palmas (de Gran Canaria), region:cn, 383_343
13
+ Bilbao, region:pv, 352_700, m:750_000
14
14
 
15
15
 
16
16
  # more cities here
@@ -1,15 +1,15 @@
1
1
  # top 10 cities
2
2
 
3
- paris, Paris, region:if, 2_234_105, m:10_755_000
4
- marseille, Marseille, region:ac, 850_602, m: 1_582_000
5
- lyon, Lyon, region:ra, 479_803, m: 1_542_000
6
- toulouse, Toulouse, region:mp, 440_204, m: 880_000
7
- nice, Nice, region:ac, 340_735, m: 962_000
8
- nantes, Nantes, region:pl, 282_047
9
- strasbourg, Strasbourg, region:al, 271_708
10
- montpellier, Montpellier, region:lr, 255_080
11
- bordeaux, Bordeaux, region:aq, 236_725, m:845_000
12
- lille, Lille, region:nc, 226_827, m:1_050_000
13
- rennes, Rennes, region:br, 206_604
3
+ Paris, region:if, 2_234_105, m:10_755_000
4
+ Marseille, region:ac, 850_602, m: 1_582_000
5
+ Lyon, region:ra, 479_803, m: 1_542_000
6
+ Toulouse, region:mp, 440_204, m: 880_000
7
+ Nice, region:ac, 340_735, m: 962_000
8
+ Nantes, region:pl, 282_047
9
+ Strasbourg, region:al, 271_708
10
+ Montpellier, region:lr, 255_080
11
+ Bordeaux, region:aq, 236_725, m:845_000
12
+ Lille, region:nc, 226_827, m:1_050_000
13
+ Rennes, region:br, 206_604
14
14
 
15
15
  # more cities
@@ -1,17 +1,17 @@
1
1
 
2
2
 
3
- milano, Milano [Milan], 1_338_436, m:5_232_000
4
- roma, Roma [Rome], 2_777_979, m:3_799_000
5
- napoli, Napoli [Naples], 1_046_987, m:3_726_000
6
- torino, Torino [Turin], 921_485, m:1_499_000
7
- palermo, Palermo, 689_349, m:876_000
8
- genova, Genova [Genoa], 655_704
9
- bologna, Bologna, 385_813
10
- firenze, Firenze [Florence], 381_762, m:821_000
3
+ Milano [Milan], 1_338_436, m:5_232_000
4
+ Roma [Rome], 2_777_979, m:3_799_000
5
+ Napoli [Naples], 1_046_987, m:3_726_000
6
+ Torino [Turin], 921_485, m:1_499_000
7
+ Palermo, 689_349, m:876_000
8
+ Genova [Genoa], 655_704
9
+ Bologna, 385_813
10
+ Firenze [Florence], 381_762, m:821_000
11
11
 
12
- catania, Catania, 341_685
13
- bari, Bari, 335_647
14
- venezia, Venezia [Venice], 297_743
15
- messina, Messina, 262_524
16
- verona, Verona, 254_146
17
- trieste, Trieste, 222_589
12
+ Catania, 341_685
13
+ Bari, 335_647
14
+ Venezia [Venice], 297_743
15
+ Messina, 262_524
16
+ Verona, 254_146
17
+ Trieste, 222_589
@@ -1,4 +1,3 @@
1
- ## Latvia cities
2
-
3
- riga, Riga, 699_203, m:1_018_295
1
+ ## Lithuania Cities (lt)
4
2
 
3
+ Vilnius, 554_060, m:838_852
@@ -1,3 +1,4 @@
1
- ## Lithuania Cities
1
+ ## Latvia cities (lv)
2
+
3
+ Riga, 699_203, m:1_018_295
2
4
 
3
- vilnius, Vilnius, 554_060, m:838_852
@@ -1,11 +1,11 @@
1
1
 
2
2
  # metros
3
3
 
4
- rotterdamthehague, Rotterdam-The Hague, m:2_113_000
4
+ Rotterdam-The Hague, m:2_113_000
5
5
 
6
6
 
7
7
  # cities
8
8
 
9
- amsterdam, Amsterdam, m:1_050_000
9
+ Amsterdam, m:1_050_000
10
10
 
11
- alkmaar, Alkmaar, ## region: North Holland
11
+ Alkmaar ## region: North Holland
@@ -6,7 +6,7 @@ gdansktri, Gdańsk (Tricity), m:775_000, metro # includes Gdańsk, Gdynia
6
6
 
7
7
  # cities
8
8
 
9
- warszawa, Warszawa [Warsaw], 1_720_398, m:1_713_000
10
- lodz, Łódź, m: 907_000
11
- krakow, Kraków, m: 760_000
9
+ Warszawa [Warsaw], 1_720_398, m:1_713_000
10
+ Łódź, m: 907_000
11
+ Kraków, m: 760_000
12
12
 
@@ -1,11 +1,11 @@
1
1
 
2
- moskva, Moskva [Moscow], 11_689_048, m:15_512_000
3
- stpetersburg, St. Petersburg, 4_879_566, m:4_879_000
2
+ Moskva [Moscow], 11_689_048, m:15_512_000
3
+ St. Petersburg, 4_879_566, m:4_879_000
4
4
 
5
- novgorod, Nizhni Novgorod, 1_250_615, m:1_248_000
5
+ Novgorod|Nizhni Novgorod, 1_250_615, m:1_248_000
6
6
 
7
- samara, Samaram, 1_164_896, m:1_163_000
8
- kazan, Kazan, 1_143_546, m:1_141_000
7
+ Samaram, 1_164_896, m:1_163_000
8
+ Kazan, 1_143_546, m:1_141_000
9
9
 
10
10
  # Yekaterinburg 1_350_136 ??
11
11
  # Chelyabinsk 1_130_273 ??
@@ -1,3 +1,3 @@
1
1
 
2
- glasgow, Glasgow, m:1_201_000
2
+ Glasgow, m:1_201_000
3
3
 
@@ -1,9 +1,9 @@
1
1
 
2
- kiev, Kiew|Kiev|Kyiv, 2_785_100, m:2_812_000
3
- kharkov, Kharkiv|Kharkov, 1_470_000, m:1_449_000
4
- donetsk, Donezk|Donetsk, m:963_000
2
+ Kiev|Kiew|Kyiv, 2_785_100, m:2_812_000
3
+ Kharkov|Kharkiv, 1_470_000, m:1_449_000
4
+ Donetsk|Donezk, m:963_000
5
5
 
6
6
 
7
- dnepropetrovsk, Dnepropetrovsk, 1_007_200, m:1_000_000
8
- odessa, Odessa, 1_003_705, m:1_010_000
7
+ Dnepropetrovsk, 1_007_200, m:1_000_000
8
+ Odessa, 1_003_705, m:1_010_000
9
9
 
@@ -1,3 +1,3 @@
1
1
 
2
- swansea, Swansea, 239000
2
+ Swansea, 239000
3
3
 
@@ -13,7 +13,7 @@ pg: PNG # Papua New Guinea
13
13
  ####
14
14
  ## not members - double check if mistaken
15
15
 
16
- kl: !!null # Kiribati
16
+ ki: !!null # Kiribati
17
17
  mh: !!null # Marshall Islands
18
18
  fm: !!null # Micronesia
19
19
  nr: !!null # Nauru
@@ -2,7 +2,7 @@
2
2
  ## oceania
3
3
 
4
4
  fj: fj # Fiji
5
- kl: kl # Kiribati
5
+ ki: ki # Kiribati
6
6
  mh: mh # Marshall Islands
7
7
  fm: fm # Micronesia
8
8
  nr: nr # Nauru
@@ -2,7 +2,7 @@
2
2
  ## oceania
3
3
 
4
4
  fj: FJI # Fiji
5
- kl: KIR # Kiribati
5
+ ki: KIR # Kiribati
6
6
  mh: MHL # Marshall Islands
7
7
  fm: FSM # Micronesia
8
8
  nr: NRU # Nauru
@@ -1,5 +1,5 @@
1
1
 
2
- kl: Kiribati
2
+ ki: Kiribati
3
3
  mh: Marshall_Islands
4
4
  fm: Federated_States_of_Micronesia
5
5
  nr: Nauru
@@ -8,24 +8,33 @@
8
8
  ## - Polynesia
9
9
 
10
10
 
11
- fj, Fiji, FIJ, 18_274, 849_000, Suva, un|fifa
12
- kl, Kiribati, KIR, 811, 103_500, Tarawa, un # nb: check code; NOT fifa member
13
- mh, Marshall Islands, MHI, 181, 68_000, Majuro, un # nb: check code; NOT fifa member
14
- fm, Micronesia, FSM, 702, 111_000, Palikir, un
15
- nr, Nauru, NAU, 21, 9_378, Yaren, un
16
- pw, Palau, PWA, 459, 20_956, Ngerulmud, un # check code
17
- ws, Samoa, SAM, 2_831, 194_320, Apia, un|fifa
18
- sb, Solomon Islands, SOL, 28_400, 523_000, Honiara, un|fifa
19
- to, Tonga, TGA, 748, 103_036, Nuku'alofa, un|fifa
20
- tv, Tuvalu, TUV, 26, 10_544, Funafuti, un
21
- vu, Vanuatu, VAN, 12_190, 224_564, Port Vila, un|fifa
11
+ au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|en|g20|commonwealth
22
12
 
13
+ ##############
14
+ ### Melanesia
15
+ ## see http://en.wikipedia.org/wiki/Melanesia
16
+
17
+ pg, Papua New Guinea, PNG, 462_840, 5_172_033, un|fifa|melanesia|commonwealth
18
+ fj, Fiji, FIJ, 18_274, 849_000, Suva, un|fifa|melanesia|commonwealth
19
+ sb, Solomon Islands, SOL, 28_400, 523_000, Honiara, un|fifa|melanesia|commonwealth
20
+ vu, Vanuatu, VAN, 12_190, 224_564, Port Vila, un|fifa|melanesia|commonwealth
23
21
 
24
22
 
25
- au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|en|g20
26
- nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|en
23
+ #############
24
+ ## Micronesia
25
+ ## see http://en.wikipedia.org/wiki/Micronesia
26
+
27
+ pw, Palau, PLW, 459, 20_956, Ngerulmud, un|micronesia|microstate # nb: use ISO code; NOT fifa member
28
+ fm, Micronesia, FSM, 702, 111_000, Palikir, un|micronesia|microstate # nb: use ISO code; NOT fifa member
29
+ mh, Marshall Islands, MHL, 181, 68_000, Majuro, un|micronesia|microstate # nb: use ISO code; NOT fifa member
30
+ nr, Nauru, NRU, 21, 9_378, Yaren, un|micronesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
31
+ ki, Kiribati, KIR, 811, 103_500, Tarawa, un|micronesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
27
32
 
28
33
  ##############
29
- ### Melanesia
34
+ ## Polynesia
35
+ ## see http://en.wikipedia.org/wiki/Polynesia
30
36
 
31
- pg, Papua New Guinea, PNG, 462_840, 5_172_033, un|fifa
37
+ nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|en|polynesia|commonwealth
38
+ ws, Samoa, SAM, 2_831, 194_320, Apia, un|fifa|polynesia|commonwealth
39
+ to, Tonga, TGA, 748, 103_036, Nuku'alofa, un|fifa|polynesia|microstate|commonwealth
40
+ tv, Tuvalu, TUV, 26, 10_544, Funafuti, un|polynesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
@@ -99,11 +99,8 @@ EOS
99
99
  WorldDB.create if opts.create?
100
100
  WorldDB.delete! if opts.delete?
101
101
 
102
- if opts.countries? || opts.regions? || opts.cities?
103
- Reader.new( logger ).run( opts, args ) # load/read plain text country/region/city fixtures
104
- else
105
- Loader.new( logger ).run( opts, args ) # load ruby fixtures
106
- end
102
+ # read plain text country/region/city fixtures
103
+ Reader.new( logger ).run( opts, args )
107
104
  end
108
105
 
109
106
  WorldDB.stats
@@ -146,6 +146,7 @@ private
146
146
 
147
147
  value_numbers = []
148
148
  value_tag_keys = []
149
+ value_cities = []
149
150
 
150
151
  ### check for "default" tags - that is, if present attribs[:tags] remove from hash
151
152
 
@@ -183,6 +184,8 @@ private
183
184
  elsif value =~ /^supra$/ ## supra(national)
184
185
  attribs[ :c ] = false # turn off default c|country flag; make it s|supra only
185
186
  attribs[ :s ] = true
187
+ ## auto-add tag supra
188
+ value_tag_keys << 'supra'
186
189
  elsif value =~ /^supra:/ ## supra:
187
190
  value_country_key = value[6..-1] ## cut off supra: prefix
188
191
  value_country = Country.find_by_key!( value_country_key )
@@ -192,7 +195,9 @@ private
192
195
  value_country = Country.find_by_key!( value_country_key )
193
196
  attribs[ :country_id ] = value_country.id
194
197
  attribs[ :c ] = false # turn off default c|country flag; make it d|depend only
195
- attribs[ :d ] = true
198
+ attribs[ :d ] = true
199
+ ## auto-add tag supra
200
+ value_tag_keys << 'territory' # rename tag to dependency? why? why not?
196
201
  elsif value =~ /^metro:/ ## metro:
197
202
  value_city_key = value[6..-1] ## cut off metro: prefix
198
203
  value_city = City.find_by_key!( value_city_key )
@@ -227,10 +232,18 @@ private
227
232
 
228
233
  value_tag_keys += tag_keys
229
234
  else
235
+
236
+ if clazz == Country || clazz == Region
237
+ ### assume it is the capital city - mark it for auto add
238
+ value_cities << value
239
+ next
240
+ end
241
+
230
242
  # issue warning: unknown type for value
231
243
  puts "!!!! >>>> warning: unknown type for value >#{value}<"
232
244
  end
233
- end
245
+ end # each value
246
+
234
247
 
235
248
  if value_numbers.size > 0
236
249
  if clazz == City
@@ -239,6 +252,43 @@ private
239
252
  else # countries,regions
240
253
  attribs[ :area ] = value_numbers[0]
241
254
  attribs[ :pop ] = value_numbers[1]
255
+
256
+ if clazz == Country
257
+ # auto-add tags
258
+ area = value_numbers[0]
259
+ pop = value_numbers[1]
260
+
261
+ # categorize into brackets
262
+ if area >= 1_000_000
263
+ value_tag_keys << 'area_1_000_000_n_up'
264
+ elsif area >= 100_000
265
+ value_tag_keys << 'area_1_000_000_n_100_000'
266
+ elsif area >= 1000
267
+ value_tag_keys << 'area_100_000_n_1_000'
268
+ else
269
+ value_tag_keys << 'area_1_000_n_less' # microstate
270
+ end
271
+
272
+ # include all
273
+ value_tag_keys << 'area_100_000_n_up' if area >= 100_000
274
+ value_tag_keys << 'area_1_000_n_up' if area >= 1_000
275
+
276
+
277
+ # categorize into brackets
278
+ if pop >= 100_000_000
279
+ value_tag_keys << 'pop_100m_n_up'
280
+ elsif pop >= 10_000_000
281
+ value_tag_keys << 'pop_100m_n_10m'
282
+ elsif pop >= 1_000_000
283
+ value_tag_keys << 'pop_10m_n_1m'
284
+ else
285
+ value_tag_keys << 'pop_1m_n_less'
286
+ end
287
+
288
+ # include all
289
+ value_tag_keys << 'pop_10m_n_up' if pop >= 10_000_000
290
+ value_tag_keys << 'pop_1m_n_up' if pop >= 1_000_000
291
+ end
242
292
  end
243
293
  end
244
294
 
@@ -262,6 +312,45 @@ private
262
312
  puts attribs.to_json
263
313
 
264
314
  rec.update_attributes!( attribs )
315
+
316
+ #################
317
+ ## auto add capital cities
318
+
319
+ value_cities.each do |city_title|
320
+
321
+ city_attribs = {}
322
+ city_key = reader.title_to_key( city_title )
323
+
324
+ ## check if it exists
325
+ ## todo/fix: add country_id for lookup?
326
+ city = City.find_by_key( city_key )
327
+ if city.present?
328
+ puts "*** update city #{city.id}-#{city.key}:"
329
+ else
330
+ puts "*** create city:"
331
+ city = City.new
332
+ city_attribs[ :key ] = city_key
333
+ end
334
+
335
+ city_attribs[ :title ] = city_title
336
+
337
+ if clazz == Country
338
+ city_attribs[ :country_id ] = rec.id
339
+ elsif clazz == Region
340
+ city_attribs[ :region_id ] = rec.id
341
+ city_attribs[ :country_id ] = rec.country_id
342
+ else
343
+ ## issue warning: unknown type for city!!!
344
+ end
345
+
346
+ puts city_attribs.to_json
347
+
348
+ city.update_attributes!( city_attribs )
349
+
350
+ ### todo/fix: add captial ref to country/region
351
+
352
+ end # each city
353
+
265
354
 
266
355
  ##################
267
356
  ## add taggings
@@ -17,11 +17,33 @@ class HashReader
17
17
  ## - see worlddb/utils.rb
18
18
 
19
19
  text = File.read_utf8( @path )
20
+
20
21
  ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
21
22
  ## change it to !null to get plain nil
22
23
  ## w/ both syck and psych/libyml
23
-
24
+
24
25
  text = text.gsub( '!!null', '!null' )
26
+
27
+ ### hacks for yaml
28
+
29
+ ### see yaml gotschas
30
+ ## - http://www.perlmonks.org/?node_id=738671
31
+ ## -
32
+
33
+ ## replace all tabs w/ two spaces and issue a warning
34
+ ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
35
+
36
+ text = text.gsub( "\t" ) do |_|
37
+ puts "*** warn: hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html"
38
+ ' ' # replace w/ two spaces
39
+ end
40
+
41
+ ## quote implicit boolean types on,no,n,y
42
+
43
+ text = text.gsub( /\b(ON|On|on|NO|No|no|N|n|Y|y)\b/ ) do |value|
44
+ puts "*** warn: hash reader - found implicit bool (#{$1}); adding quotes to turn into string; see yaml.org/refcard.html"
45
+ "'#{$1}'" # add quotes to turn it into a string (not bool e.g. true|false)
46
+ end
25
47
 
26
48
  @hash = YAML.load( text )
27
49
  end
@@ -36,13 +58,6 @@ class HashReader
36
58
  key = key_wild.to_s.strip
37
59
  value = value_wild.to_s.strip
38
60
 
39
- ### hack - hack - hack -change
40
- ## no: in yml becomes false !!! check how to escape!
41
-
42
- key = 'no' if key == 'false'
43
- value = 'no' if value == 'false'
44
- ### todo: issue warnings
45
-
46
61
  puts ">>#{key}<< >>#{value}<<"
47
62
 
48
63
  yield( key, value )
@@ -68,26 +68,104 @@ class ValuesReader
68
68
  end
69
69
 
70
70
  puts " values: >>#{values.join('<< >>')}<<"
71
-
72
- attribs = {
73
- key: values[0]
74
- }
75
71
 
72
+
73
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
74
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
75
+
76
+
77
+ ### support autogenerate key from first title value
78
+ if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
79
+ key_col = values[0]
80
+ title_col = values[1]
81
+ more_cols = values[2..-1]
82
+ else
83
+ key_col = '<auto>'
84
+ title_col = values[0]
85
+ more_cols = values[1..-1]
86
+ end
87
+
88
+ attribs = {}
89
+
76
90
  ## title (split of optional synonyms)
77
91
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
78
- titles = values[1].split('|')
92
+ titles = title_col.split('|')
79
93
 
80
94
  attribs[ :title ] = titles[0]
81
- ## add optional synonyms
95
+
96
+ ## add optional synonyms if present
82
97
  attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
83
98
 
99
+ if key_col == '<auto>'
100
+ ## autogenerate key from first title
101
+ key_col = title_to_key( titles[0] )
102
+ puts " autogen key >#{key_col}< from title >#{titles[0]}<"
103
+ end
104
+
105
+ attribs[ :key ] = key_col
106
+
84
107
  attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
85
108
 
86
- yield( attribs, values[2..-1] )
109
+ yield( attribs, more_cols )
87
110
 
88
111
  end # each lines
89
112
 
90
113
  end # method each_line
91
114
 
92
- end # class ValuesReader
115
+
116
+
117
+ def title_to_key( title )
93
118
 
119
+ ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
120
+ key = title.downcase
121
+
122
+ ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
123
+ key = key.gsub( /\[.+\]/, '' )
124
+
125
+ ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
126
+ key = key.gsub( /\(.+\)/, '' )
127
+
128
+ ## remove all whitespace and punctuation
129
+ key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
130
+
131
+ ## turn accented char into ascii look alike if possible
132
+ ##
133
+ ## todo: add some more
134
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
135
+
136
+ alternatives = [
137
+ ['ß', 'ss'],
138
+ ['æ', 'ae'],
139
+ ['ä', 'ae'],
140
+ ['á', 'a' ], # e.g. Bogotá, Králové
141
+ ['ã', 'a' ], # e.g São Paulo
142
+ ['ă', 'a' ], # e.g. Chișinău
143
+ ['é', 'e' ], # e.g. Vélez, Králové
144
+ ['è', 'e' ], # e.g. Rivières
145
+ ['ê', 'e' ], # e.g. Grêmio
146
+ ['ě', 'e' ], # e.g. Budějovice
147
+ ['ì', 'i' ], # e.g. Potosì
148
+ ['í', 'i' ], # e.g. Ústí
149
+ ['ñ', 'n' ], # e.g. Porteño
150
+ ['ň', 'n' ], # e.g. Plzeň, Třeboň
151
+ ['ö', 'oe'],
152
+ ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
153
+ ['ř', 'r' ], # e.g. Třeboň
154
+ ['ș', 's' ], # e.g. Chișinău
155
+ ['ü', 'ue'],
156
+ ['ú', 'u' ], # e.g. Fútbol
157
+ ['ź', 'z' ], # e.g. Łódź
158
+ ['Č', 'c' ], # e.g. České
159
+ ['Ł', 'l' ], # e.g. Łódź
160
+ ['Ú', 'u' ], # e.g. Ústí
161
+ ]
162
+
163
+ alternatives.each do |alt|
164
+ key = key.gsub( alt[0], alt[1] )
165
+ end
166
+
167
+ key
168
+ end # method title_to_key
169
+
170
+
171
+ end # class ValuesReader
@@ -1,5 +1,5 @@
1
1
 
2
2
  module WorldDB
3
- VERSION = '0.7.0'
3
+ VERSION = '0.7.1'
4
4
  end
5
5
 
data/lib/worlddb.rb CHANGED
@@ -35,7 +35,6 @@ require 'worlddb/readers/line_reader'
35
35
  require 'worlddb/readers/values_reader'
36
36
  require 'worlddb/readers/hash_reader'
37
37
  require 'worlddb/reader'
38
- require 'worlddb/loader'
39
38
  require 'worlddb/cli/opts'
40
39
  require 'worlddb/cli/runner'
41
40
 
@@ -62,16 +61,6 @@ module WorldDB
62
61
  CreateDB.up
63
62
  end
64
63
 
65
- # load built-in (that is, bundled within the gem) named seeds
66
- # - pass in an array of seed names e.g. [ 'countries', 'at/cities', 'de/cities' ] etc.
67
-
68
- def self.load( ary )
69
- loader = Loader.new
70
- ary.each do |name|
71
- loader.load_fixtures_builtin( name )
72
- end
73
- end
74
-
75
64
  def self.fixtures # all builtin fixtures; helper for covenience
76
65
  africa_fixtures +
77
66
  america_fixtures +
@@ -94,7 +83,6 @@ module WorldDB
94
83
  '1_codes/iso3',
95
84
  '1_codes/motor',
96
85
  'br/regions',
97
- 'br/cities',
98
86
  'ca/regions',
99
87
  'ca/cities',
100
88
  'mx/regions',
@@ -169,6 +157,7 @@ module WorldDB
169
157
  'au/cities'].map { |path| "oceania/#{path}" }
170
158
  end
171
159
 
160
+ ## todo/fix: rename to load/load_all - why? why not?? or just add an alias?
172
161
 
173
162
  def self.read( ary )
174
163
  reader = Reader.new
@@ -183,7 +172,6 @@ module WorldDB
183
172
  # too big for heroku free db plan (10,000 record limit)
184
173
  # - sorry, can't load by default
185
174
  fixture_excludes = [
186
- 'america/br/cities',
187
175
  'america/ve/cities'
188
176
  ]
189
177