worlddb 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/Manifest.txt +0 -2
  2. data/data/africa/countries.txt +63 -73
  3. data/data/america/ca/cities.txt +35 -35
  4. data/data/america/countries.txt +29 -24
  5. data/data/america/mx/cities.txt +18 -18
  6. data/data/america/us/cities.txt +39 -39
  7. data/data/asia/countries.txt +74 -64
  8. data/data/europe/at/cities.txt +31 -31
  9. data/data/europe/at/regions.txt +6 -6
  10. data/data/europe/be/cities.txt +10 -10
  11. data/data/europe/countries.txt +71 -53
  12. data/data/europe/cz/cities.txt +16 -16
  13. data/data/europe/de/cities.txt +28 -28
  14. data/data/europe/en/cities.txt +21 -19
  15. data/data/europe/es/cities.txt +10 -10
  16. data/data/europe/fr/cities.txt +11 -11
  17. data/data/europe/it/cities.txt +14 -14
  18. data/data/europe/lt/cities.txt +2 -3
  19. data/data/europe/lv/cities.txt +3 -2
  20. data/data/europe/nl/cities.txt +3 -3
  21. data/data/europe/pl/cities.txt +3 -3
  22. data/data/europe/ru/cities.txt +5 -5
  23. data/data/europe/sc/cities.txt +1 -1
  24. data/data/europe/ua/cities.txt +5 -5
  25. data/data/europe/wa/cities.txt +1 -1
  26. data/data/oceania/1_codes/fifa.yml +1 -1
  27. data/data/oceania/1_codes/internet.yml +1 -1
  28. data/data/oceania/1_codes/iso3.yml +1 -1
  29. data/data/oceania/3_more/en.wikipedia.yml +1 -1
  30. data/data/oceania/countries.txt +24 -15
  31. data/lib/worlddb/cli/runner.rb +2 -5
  32. data/lib/worlddb/reader.rb +91 -2
  33. data/lib/worlddb/readers/hash_reader.rb +23 -8
  34. data/lib/worlddb/readers/values_reader.rb +86 -8
  35. data/lib/worlddb/version.rb +1 -1
  36. data/lib/worlddb.rb +1 -13
  37. metadata +4 -6
  38. data/data/america/br/cities.txt +0 -9971
  39. data/lib/worlddb/loader.rb +0 -55
@@ -1,16 +1,16 @@
1
1
 
2
2
  ## top 10 cities
3
3
 
4
- madrid, Madrid, region:md, 3_265_038, m:5_427_000
5
- barcelona, Barcelona, region:ct, 1_615_448, m:4_223_000
6
- valencia, Valencia, region:vc, 798_033, m:810_000
7
- sevilla, Sevilla [Seville], region:an, 703_021, m:750_000
8
- zaragoza, Zaragoza, region:ar, 674_725
9
- malaga, Málaga, region:an, 568_030
10
- murcia, Murcia, region:mc, 442_203
11
- palma, Palma de Mallorca, region:ib, 405_318
12
- laspalmas, Las Palmas (de Gran Canaria), region:cn, 383_343
13
- bilbao, Bilbao, region:pv, 352_700, m:750_000
4
+ Madrid, region:md, 3_265_038, m:5_427_000
5
+ Barcelona, region:ct, 1_615_448, m:4_223_000
6
+ Valencia, region:vc, 798_033, m:810_000
7
+ Sevilla [Seville], region:an, 703_021, m:750_000
8
+ Zaragoza, region:ar, 674_725
9
+ Málaga, region:an, 568_030
10
+ Murcia, region:mc, 442_203
11
+ Palma (de Mallorca), region:ib, 405_318
12
+ Las Palmas (de Gran Canaria), region:cn, 383_343
13
+ Bilbao, region:pv, 352_700, m:750_000
14
14
 
15
15
 
16
16
  # more cities here
@@ -1,15 +1,15 @@
1
1
  # top 10 cities
2
2
 
3
- paris, Paris, region:if, 2_234_105, m:10_755_000
4
- marseille, Marseille, region:ac, 850_602, m: 1_582_000
5
- lyon, Lyon, region:ra, 479_803, m: 1_542_000
6
- toulouse, Toulouse, region:mp, 440_204, m: 880_000
7
- nice, Nice, region:ac, 340_735, m: 962_000
8
- nantes, Nantes, region:pl, 282_047
9
- strasbourg, Strasbourg, region:al, 271_708
10
- montpellier, Montpellier, region:lr, 255_080
11
- bordeaux, Bordeaux, region:aq, 236_725, m:845_000
12
- lille, Lille, region:nc, 226_827, m:1_050_000
13
- rennes, Rennes, region:br, 206_604
3
+ Paris, region:if, 2_234_105, m:10_755_000
4
+ Marseille, region:ac, 850_602, m: 1_582_000
5
+ Lyon, region:ra, 479_803, m: 1_542_000
6
+ Toulouse, region:mp, 440_204, m: 880_000
7
+ Nice, region:ac, 340_735, m: 962_000
8
+ Nantes, region:pl, 282_047
9
+ Strasbourg, region:al, 271_708
10
+ Montpellier, region:lr, 255_080
11
+ Bordeaux, region:aq, 236_725, m:845_000
12
+ Lille, region:nc, 226_827, m:1_050_000
13
+ Rennes, region:br, 206_604
14
14
 
15
15
  # more cities
@@ -1,17 +1,17 @@
1
1
 
2
2
 
3
- milano, Milano [Milan], 1_338_436, m:5_232_000
4
- roma, Roma [Rome], 2_777_979, m:3_799_000
5
- napoli, Napoli [Naples], 1_046_987, m:3_726_000
6
- torino, Torino [Turin], 921_485, m:1_499_000
7
- palermo, Palermo, 689_349, m:876_000
8
- genova, Genova [Genoa], 655_704
9
- bologna, Bologna, 385_813
10
- firenze, Firenze [Florence], 381_762, m:821_000
3
+ Milano [Milan], 1_338_436, m:5_232_000
4
+ Roma [Rome], 2_777_979, m:3_799_000
5
+ Napoli [Naples], 1_046_987, m:3_726_000
6
+ Torino [Turin], 921_485, m:1_499_000
7
+ Palermo, 689_349, m:876_000
8
+ Genova [Genoa], 655_704
9
+ Bologna, 385_813
10
+ Firenze [Florence], 381_762, m:821_000
11
11
 
12
- catania, Catania, 341_685
13
- bari, Bari, 335_647
14
- venezia, Venezia [Venice], 297_743
15
- messina, Messina, 262_524
16
- verona, Verona, 254_146
17
- trieste, Trieste, 222_589
12
+ Catania, 341_685
13
+ Bari, 335_647
14
+ Venezia [Venice], 297_743
15
+ Messina, 262_524
16
+ Verona, 254_146
17
+ Trieste, 222_589
@@ -1,4 +1,3 @@
1
- ## Latvia cities
2
-
3
- riga, Riga, 699_203, m:1_018_295
1
+ ## Lithuania Cities (lt)
4
2
 
3
+ Vilnius, 554_060, m:838_852
@@ -1,3 +1,4 @@
1
- ## Lithuania Cities
1
+ ## Latvia cities (lv)
2
+
3
+ Riga, 699_203, m:1_018_295
2
4
 
3
- vilnius, Vilnius, 554_060, m:838_852
@@ -1,11 +1,11 @@
1
1
 
2
2
  # metros
3
3
 
4
- rotterdamthehague, Rotterdam-The Hague, m:2_113_000
4
+ Rotterdam-The Hague, m:2_113_000
5
5
 
6
6
 
7
7
  # cities
8
8
 
9
- amsterdam, Amsterdam, m:1_050_000
9
+ Amsterdam, m:1_050_000
10
10
 
11
- alkmaar, Alkmaar, ## region: North Holland
11
+ Alkmaar ## region: North Holland
@@ -6,7 +6,7 @@ gdansktri, Gdańsk (Tricity), m:775_000, metro # includes Gdańsk, Gdynia
6
6
 
7
7
  # cities
8
8
 
9
- warszawa, Warszawa [Warsaw], 1_720_398, m:1_713_000
10
- lodz, Łódź, m: 907_000
11
- krakow, Kraków, m: 760_000
9
+ Warszawa [Warsaw], 1_720_398, m:1_713_000
10
+ Łódź, m: 907_000
11
+ Kraków, m: 760_000
12
12
 
@@ -1,11 +1,11 @@
1
1
 
2
- moskva, Moskva [Moscow], 11_689_048, m:15_512_000
3
- stpetersburg, St. Petersburg, 4_879_566, m:4_879_000
2
+ Moskva [Moscow], 11_689_048, m:15_512_000
3
+ St. Petersburg, 4_879_566, m:4_879_000
4
4
 
5
- novgorod, Nizhni Novgorod, 1_250_615, m:1_248_000
5
+ Novgorod|Nizhni Novgorod, 1_250_615, m:1_248_000
6
6
 
7
- samara, Samaram, 1_164_896, m:1_163_000
8
- kazan, Kazan, 1_143_546, m:1_141_000
7
+ Samaram, 1_164_896, m:1_163_000
8
+ Kazan, 1_143_546, m:1_141_000
9
9
 
10
10
  # Yekaterinburg 1_350_136 ??
11
11
  # Chelyabinsk 1_130_273 ??
@@ -1,3 +1,3 @@
1
1
 
2
- glasgow, Glasgow, m:1_201_000
2
+ Glasgow, m:1_201_000
3
3
 
@@ -1,9 +1,9 @@
1
1
 
2
- kiev, Kiew|Kiev|Kyiv, 2_785_100, m:2_812_000
3
- kharkov, Kharkiv|Kharkov, 1_470_000, m:1_449_000
4
- donetsk, Donezk|Donetsk, m:963_000
2
+ Kiev|Kiew|Kyiv, 2_785_100, m:2_812_000
3
+ Kharkov|Kharkiv, 1_470_000, m:1_449_000
4
+ Donetsk|Donezk, m:963_000
5
5
 
6
6
 
7
- dnepropetrovsk, Dnepropetrovsk, 1_007_200, m:1_000_000
8
- odessa, Odessa, 1_003_705, m:1_010_000
7
+ Dnepropetrovsk, 1_007_200, m:1_000_000
8
+ Odessa, 1_003_705, m:1_010_000
9
9
 
@@ -1,3 +1,3 @@
1
1
 
2
- swansea, Swansea, 239000
2
+ Swansea, 239000
3
3
 
@@ -13,7 +13,7 @@ pg: PNG # Papua New Guinea
13
13
  ####
14
14
  ## not members - double check if mistaken
15
15
 
16
- kl: !!null # Kiribati
16
+ ki: !!null # Kiribati
17
17
  mh: !!null # Marshall Islands
18
18
  fm: !!null # Micronesia
19
19
  nr: !!null # Nauru
@@ -2,7 +2,7 @@
2
2
  ## oceania
3
3
 
4
4
  fj: fj # Fiji
5
- kl: kl # Kiribati
5
+ ki: ki # Kiribati
6
6
  mh: mh # Marshall Islands
7
7
  fm: fm # Micronesia
8
8
  nr: nr # Nauru
@@ -2,7 +2,7 @@
2
2
  ## oceania
3
3
 
4
4
  fj: FJI # Fiji
5
- kl: KIR # Kiribati
5
+ ki: KIR # Kiribati
6
6
  mh: MHL # Marshall Islands
7
7
  fm: FSM # Micronesia
8
8
  nr: NRU # Nauru
@@ -1,5 +1,5 @@
1
1
 
2
- kl: Kiribati
2
+ ki: Kiribati
3
3
  mh: Marshall_Islands
4
4
  fm: Federated_States_of_Micronesia
5
5
  nr: Nauru
@@ -8,24 +8,33 @@
8
8
  ## - Polynesia
9
9
 
10
10
 
11
- fj, Fiji, FIJ, 18_274, 849_000, Suva, un|fifa
12
- kl, Kiribati, KIR, 811, 103_500, Tarawa, un # nb: check code; NOT fifa member
13
- mh, Marshall Islands, MHI, 181, 68_000, Majuro, un # nb: check code; NOT fifa member
14
- fm, Micronesia, FSM, 702, 111_000, Palikir, un
15
- nr, Nauru, NAU, 21, 9_378, Yaren, un
16
- pw, Palau, PWA, 459, 20_956, Ngerulmud, un # check code
17
- ws, Samoa, SAM, 2_831, 194_320, Apia, un|fifa
18
- sb, Solomon Islands, SOL, 28_400, 523_000, Honiara, un|fifa
19
- to, Tonga, TGA, 748, 103_036, Nuku'alofa, un|fifa
20
- tv, Tuvalu, TUV, 26, 10_544, Funafuti, un
21
- vu, Vanuatu, VAN, 12_190, 224_564, Port Vila, un|fifa
11
+ au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|en|g20|commonwealth
22
12
 
13
+ ##############
14
+ ### Melanesia
15
+ ## see http://en.wikipedia.org/wiki/Melanesia
16
+
17
+ pg, Papua New Guinea, PNG, 462_840, 5_172_033, un|fifa|melanesia|commonwealth
18
+ fj, Fiji, FIJ, 18_274, 849_000, Suva, un|fifa|melanesia|commonwealth
19
+ sb, Solomon Islands, SOL, 28_400, 523_000, Honiara, un|fifa|melanesia|commonwealth
20
+ vu, Vanuatu, VAN, 12_190, 224_564, Port Vila, un|fifa|melanesia|commonwealth
23
21
 
24
22
 
25
- au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|en|g20
26
- nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|en
23
+ #############
24
+ ## Micronesia
25
+ ## see http://en.wikipedia.org/wiki/Micronesia
26
+
27
+ pw, Palau, PLW, 459, 20_956, Ngerulmud, un|micronesia|microstate # nb: use ISO code; NOT fifa member
28
+ fm, Micronesia, FSM, 702, 111_000, Palikir, un|micronesia|microstate # nb: use ISO code; NOT fifa member
29
+ mh, Marshall Islands, MHL, 181, 68_000, Majuro, un|micronesia|microstate # nb: use ISO code; NOT fifa member
30
+ nr, Nauru, NRU, 21, 9_378, Yaren, un|micronesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
31
+ ki, Kiribati, KIR, 811, 103_500, Tarawa, un|micronesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
27
32
 
28
33
  ##############
29
- ### Melanesia
34
+ ## Polynesia
35
+ ## see http://en.wikipedia.org/wiki/Polynesia
30
36
 
31
- pg, Papua New Guinea, PNG, 462_840, 5_172_033, un|fifa
37
+ nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|en|polynesia|commonwealth
38
+ ws, Samoa, SAM, 2_831, 194_320, Apia, un|fifa|polynesia|commonwealth
39
+ to, Tonga, TGA, 748, 103_036, Nuku'alofa, un|fifa|polynesia|microstate|commonwealth
40
+ tv, Tuvalu, TUV, 26, 10_544, Funafuti, un|polynesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
@@ -99,11 +99,8 @@ EOS
99
99
  WorldDB.create if opts.create?
100
100
  WorldDB.delete! if opts.delete?
101
101
 
102
- if opts.countries? || opts.regions? || opts.cities?
103
- Reader.new( logger ).run( opts, args ) # load/read plain text country/region/city fixtures
104
- else
105
- Loader.new( logger ).run( opts, args ) # load ruby fixtures
106
- end
102
+ # read plain text country/region/city fixtures
103
+ Reader.new( logger ).run( opts, args )
107
104
  end
108
105
 
109
106
  WorldDB.stats
@@ -146,6 +146,7 @@ private
146
146
 
147
147
  value_numbers = []
148
148
  value_tag_keys = []
149
+ value_cities = []
149
150
 
150
151
  ### check for "default" tags - that is, if present attribs[:tags] remove from hash
151
152
 
@@ -183,6 +184,8 @@ private
183
184
  elsif value =~ /^supra$/ ## supra(national)
184
185
  attribs[ :c ] = false # turn off default c|country flag; make it s|supra only
185
186
  attribs[ :s ] = true
187
+ ## auto-add tag supra
188
+ value_tag_keys << 'supra'
186
189
  elsif value =~ /^supra:/ ## supra:
187
190
  value_country_key = value[6..-1] ## cut off supra: prefix
188
191
  value_country = Country.find_by_key!( value_country_key )
@@ -192,7 +195,9 @@ private
192
195
  value_country = Country.find_by_key!( value_country_key )
193
196
  attribs[ :country_id ] = value_country.id
194
197
  attribs[ :c ] = false # turn off default c|country flag; make it d|depend only
195
- attribs[ :d ] = true
198
+ attribs[ :d ] = true
199
+ ## auto-add tag supra
200
+ value_tag_keys << 'territory' # rename tag to dependency? why? why not?
196
201
  elsif value =~ /^metro:/ ## metro:
197
202
  value_city_key = value[6..-1] ## cut off metro: prefix
198
203
  value_city = City.find_by_key!( value_city_key )
@@ -227,10 +232,18 @@ private
227
232
 
228
233
  value_tag_keys += tag_keys
229
234
  else
235
+
236
+ if clazz == Country || clazz == Region
237
+ ### assume it is the capital city - mark it for auto add
238
+ value_cities << value
239
+ next
240
+ end
241
+
230
242
  # issue warning: unknown type for value
231
243
  puts "!!!! >>>> warning: unknown type for value >#{value}<"
232
244
  end
233
- end
245
+ end # each value
246
+
234
247
 
235
248
  if value_numbers.size > 0
236
249
  if clazz == City
@@ -239,6 +252,43 @@ private
239
252
  else # countries,regions
240
253
  attribs[ :area ] = value_numbers[0]
241
254
  attribs[ :pop ] = value_numbers[1]
255
+
256
+ if clazz == Country
257
+ # auto-add tags
258
+ area = value_numbers[0]
259
+ pop = value_numbers[1]
260
+
261
+ # categorize into brackets
262
+ if area >= 1_000_000
263
+ value_tag_keys << 'area_1_000_000_n_up'
264
+ elsif area >= 100_000
265
+ value_tag_keys << 'area_1_000_000_n_100_000'
266
+ elsif area >= 1000
267
+ value_tag_keys << 'area_100_000_n_1_000'
268
+ else
269
+ value_tag_keys << 'area_1_000_n_less' # microstate
270
+ end
271
+
272
+ # include all
273
+ value_tag_keys << 'area_100_000_n_up' if area >= 100_000
274
+ value_tag_keys << 'area_1_000_n_up' if area >= 1_000
275
+
276
+
277
+ # categorize into brackets
278
+ if pop >= 100_000_000
279
+ value_tag_keys << 'pop_100m_n_up'
280
+ elsif pop >= 10_000_000
281
+ value_tag_keys << 'pop_100m_n_10m'
282
+ elsif pop >= 1_000_000
283
+ value_tag_keys << 'pop_10m_n_1m'
284
+ else
285
+ value_tag_keys << 'pop_1m_n_less'
286
+ end
287
+
288
+ # include all
289
+ value_tag_keys << 'pop_10m_n_up' if pop >= 10_000_000
290
+ value_tag_keys << 'pop_1m_n_up' if pop >= 1_000_000
291
+ end
242
292
  end
243
293
  end
244
294
 
@@ -262,6 +312,45 @@ private
262
312
  puts attribs.to_json
263
313
 
264
314
  rec.update_attributes!( attribs )
315
+
316
+ #################
317
+ ## auto add capital cities
318
+
319
+ value_cities.each do |city_title|
320
+
321
+ city_attribs = {}
322
+ city_key = reader.title_to_key( city_title )
323
+
324
+ ## check if it exists
325
+ ## todo/fix: add country_id for lookup?
326
+ city = City.find_by_key( city_key )
327
+ if city.present?
328
+ puts "*** update city #{city.id}-#{city.key}:"
329
+ else
330
+ puts "*** create city:"
331
+ city = City.new
332
+ city_attribs[ :key ] = city_key
333
+ end
334
+
335
+ city_attribs[ :title ] = city_title
336
+
337
+ if clazz == Country
338
+ city_attribs[ :country_id ] = rec.id
339
+ elsif clazz == Region
340
+ city_attribs[ :region_id ] = rec.id
341
+ city_attribs[ :country_id ] = rec.country_id
342
+ else
343
+ ## issue warning: unknown type for city!!!
344
+ end
345
+
346
+ puts city_attribs.to_json
347
+
348
+ city.update_attributes!( city_attribs )
349
+
350
+ ### todo/fix: add captial ref to country/region
351
+
352
+ end # each city
353
+
265
354
 
266
355
  ##################
267
356
  ## add taggings
@@ -17,11 +17,33 @@ class HashReader
17
17
  ## - see worlddb/utils.rb
18
18
 
19
19
  text = File.read_utf8( @path )
20
+
20
21
  ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
21
22
  ## change it to !null to get plain nil
22
23
  ## w/ both syck and psych/libyml
23
-
24
+
24
25
  text = text.gsub( '!!null', '!null' )
26
+
27
+ ### hacks for yaml
28
+
29
+ ### see yaml gotschas
30
+ ## - http://www.perlmonks.org/?node_id=738671
31
+ ## -
32
+
33
+ ## replace all tabs w/ two spaces and issue a warning
34
+ ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
35
+
36
+ text = text.gsub( "\t" ) do |_|
37
+ puts "*** warn: hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html"
38
+ ' ' # replace w/ two spaces
39
+ end
40
+
41
+ ## quote implicit boolean types on,no,n,y
42
+
43
+ text = text.gsub( /\b(ON|On|on|NO|No|no|N|n|Y|y)\b/ ) do |value|
44
+ puts "*** warn: hash reader - found implicit bool (#{$1}); adding quotes to turn into string; see yaml.org/refcard.html"
45
+ "'#{$1}'" # add quotes to turn it into a string (not bool e.g. true|false)
46
+ end
25
47
 
26
48
  @hash = YAML.load( text )
27
49
  end
@@ -36,13 +58,6 @@ class HashReader
36
58
  key = key_wild.to_s.strip
37
59
  value = value_wild.to_s.strip
38
60
 
39
- ### hack - hack - hack -change
40
- ## no: in yml becomes false !!! check how to escape!
41
-
42
- key = 'no' if key == 'false'
43
- value = 'no' if value == 'false'
44
- ### todo: issue warnings
45
-
46
61
  puts ">>#{key}<< >>#{value}<<"
47
62
 
48
63
  yield( key, value )
@@ -68,26 +68,104 @@ class ValuesReader
68
68
  end
69
69
 
70
70
  puts " values: >>#{values.join('<< >>')}<<"
71
-
72
- attribs = {
73
- key: values[0]
74
- }
75
71
 
72
+
73
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
74
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
75
+
76
+
77
+ ### support autogenerate key from first title value
78
+ if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
79
+ key_col = values[0]
80
+ title_col = values[1]
81
+ more_cols = values[2..-1]
82
+ else
83
+ key_col = '<auto>'
84
+ title_col = values[0]
85
+ more_cols = values[1..-1]
86
+ end
87
+
88
+ attribs = {}
89
+
76
90
  ## title (split of optional synonyms)
77
91
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
78
- titles = values[1].split('|')
92
+ titles = title_col.split('|')
79
93
 
80
94
  attribs[ :title ] = titles[0]
81
- ## add optional synonyms
95
+
96
+ ## add optional synonyms if present
82
97
  attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
83
98
 
99
+ if key_col == '<auto>'
100
+ ## autogenerate key from first title
101
+ key_col = title_to_key( titles[0] )
102
+ puts " autogen key >#{key_col}< from title >#{titles[0]}<"
103
+ end
104
+
105
+ attribs[ :key ] = key_col
106
+
84
107
  attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
85
108
 
86
- yield( attribs, values[2..-1] )
109
+ yield( attribs, more_cols )
87
110
 
88
111
  end # each lines
89
112
 
90
113
  end # method each_line
91
114
 
92
- end # class ValuesReader
115
+
116
+
117
+ def title_to_key( title )
93
118
 
119
+ ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
120
+ key = title.downcase
121
+
122
+ ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
123
+ key = key.gsub( /\[.+\]/, '' )
124
+
125
+ ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
126
+ key = key.gsub( /\(.+\)/, '' )
127
+
128
+ ## remove all whitespace and punctuation
129
+ key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
130
+
131
+ ## turn accented char into ascii look alike if possible
132
+ ##
133
+ ## todo: add some more
134
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
135
+
136
+ alternatives = [
137
+ ['ß', 'ss'],
138
+ ['æ', 'ae'],
139
+ ['ä', 'ae'],
140
+ ['á', 'a' ], # e.g. Bogotá, Králové
141
+ ['ã', 'a' ], # e.g São Paulo
142
+ ['ă', 'a' ], # e.g. Chișinău
143
+ ['é', 'e' ], # e.g. Vélez, Králové
144
+ ['è', 'e' ], # e.g. Rivières
145
+ ['ê', 'e' ], # e.g. Grêmio
146
+ ['ě', 'e' ], # e.g. Budějovice
147
+ ['ì', 'i' ], # e.g. Potosì
148
+ ['í', 'i' ], # e.g. Ústí
149
+ ['ñ', 'n' ], # e.g. Porteño
150
+ ['ň', 'n' ], # e.g. Plzeň, Třeboň
151
+ ['ö', 'oe'],
152
+ ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
153
+ ['ř', 'r' ], # e.g. Třeboň
154
+ ['ș', 's' ], # e.g. Chișinău
155
+ ['ü', 'ue'],
156
+ ['ú', 'u' ], # e.g. Fútbol
157
+ ['ź', 'z' ], # e.g. Łódź
158
+ ['Č', 'c' ], # e.g. České
159
+ ['Ł', 'l' ], # e.g. Łódź
160
+ ['Ú', 'u' ], # e.g. Ústí
161
+ ]
162
+
163
+ alternatives.each do |alt|
164
+ key = key.gsub( alt[0], alt[1] )
165
+ end
166
+
167
+ key
168
+ end # method title_to_key
169
+
170
+
171
+ end # class ValuesReader
@@ -1,5 +1,5 @@
1
1
 
2
2
  module WorldDB
3
- VERSION = '0.7.0'
3
+ VERSION = '0.7.1'
4
4
  end
5
5
 
data/lib/worlddb.rb CHANGED
@@ -35,7 +35,6 @@ require 'worlddb/readers/line_reader'
35
35
  require 'worlddb/readers/values_reader'
36
36
  require 'worlddb/readers/hash_reader'
37
37
  require 'worlddb/reader'
38
- require 'worlddb/loader'
39
38
  require 'worlddb/cli/opts'
40
39
  require 'worlddb/cli/runner'
41
40
 
@@ -62,16 +61,6 @@ module WorldDB
62
61
  CreateDB.up
63
62
  end
64
63
 
65
- # load built-in (that is, bundled within the gem) named seeds
66
- # - pass in an array of seed names e.g. [ 'countries', 'at/cities', 'de/cities' ] etc.
67
-
68
- def self.load( ary )
69
- loader = Loader.new
70
- ary.each do |name|
71
- loader.load_fixtures_builtin( name )
72
- end
73
- end
74
-
75
64
  def self.fixtures # all builtin fixtures; helper for covenience
76
65
  africa_fixtures +
77
66
  america_fixtures +
@@ -94,7 +83,6 @@ module WorldDB
94
83
  '1_codes/iso3',
95
84
  '1_codes/motor',
96
85
  'br/regions',
97
- 'br/cities',
98
86
  'ca/regions',
99
87
  'ca/cities',
100
88
  'mx/regions',
@@ -169,6 +157,7 @@ module WorldDB
169
157
  'au/cities'].map { |path| "oceania/#{path}" }
170
158
  end
171
159
 
160
+ ## todo/fix: rename to load/load_all - why? why not?? or just add an alias?
172
161
 
173
162
  def self.read( ary )
174
163
  reader = Reader.new
@@ -183,7 +172,6 @@ module WorldDB
183
172
  # too big for heroku free db plan (10,000 record limit)
184
173
  # - sorry, can't load by default
185
174
  fixture_excludes = [
186
- 'america/br/cities',
187
175
  'america/ve/cities'
188
176
  ]
189
177