worlddb 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +10 -1
- data/Rakefile +3 -1
- data/data/africa/3_more/lang.yml +63 -0
- data/data/africa/countries.txt +5 -5
- data/data/america/3_more/lang.yml +60 -0
- data/data/america/countries.txt +26 -26
- data/data/asia/3_more/lang.yml +55 -0
- data/data/europe/3_more/lang.yml +47 -0
- data/data/europe/countries.txt +9 -9
- data/data/langs.yml +210 -0
- data/data/oceania/3_more/lang.yml +17 -0
- data/data/oceania/countries.txt +2 -2
- data/data/tags.1.yml +40 -0
- data/data/tags.3.yml +23 -0
- data/lib/worlddb/models/country.rb +3 -0
- data/lib/worlddb/models/lang.rb +15 -0
- data/lib/worlddb/models/tag.rb +6 -2
- data/lib/worlddb/models/usage.rb +12 -0
- data/lib/worlddb/reader.rb +145 -5
- data/lib/worlddb/readers/hash_reader.rb +19 -5
- data/lib/worlddb/schema.rb +17 -1
- data/lib/worlddb/version.rb +1 -1
- data/lib/worlddb.rb +42 -15
- metadata +15 -6
- data/data/tags.yml +0 -17
data/Manifest.txt
CHANGED
@@ -8,6 +8,7 @@ data/africa/1_codes/internet.yml
|
|
8
8
|
data/africa/1_codes/iso3.yml
|
9
9
|
data/africa/2_names/de.yml
|
10
10
|
data/africa/3_more/en.wikipedia.yml
|
11
|
+
data/africa/3_more/lang.yml
|
11
12
|
data/africa/countries.txt
|
12
13
|
data/america/1_codes/fifa.yml
|
13
14
|
data/america/1_codes/internet.yml
|
@@ -16,6 +17,7 @@ data/america/1_codes/motor.yml
|
|
16
17
|
data/america/2_names/de.yml
|
17
18
|
data/america/2_names/es.yml
|
18
19
|
data/america/3_more/en.wikipedia.yml
|
20
|
+
data/america/3_more/lang.yml
|
19
21
|
data/america/br/regions.txt
|
20
22
|
data/america/ca/cities.txt
|
21
23
|
data/america/ca/regions.txt
|
@@ -31,6 +33,7 @@ data/asia/1_codes/internet.yml
|
|
31
33
|
data/asia/1_codes/iso3.yml
|
32
34
|
data/asia/2_names/de.yml
|
33
35
|
data/asia/3_more/en.wikipedia.yml
|
36
|
+
data/asia/3_more/lang.yml
|
34
37
|
data/asia/countries.txt
|
35
38
|
data/asia/jp/cities.txt
|
36
39
|
data/europe/1_codes/fifa.yml
|
@@ -40,6 +43,7 @@ data/europe/1_codes/motor.yml
|
|
40
43
|
data/europe/2_names/de.yml
|
41
44
|
data/europe/2_names/es.yml
|
42
45
|
data/europe/3_more/en.wikipedia.yml
|
46
|
+
data/europe/3_more/lang.yml
|
43
47
|
data/europe/at/cities.txt
|
44
48
|
data/europe/at/regions.txt
|
45
49
|
data/europe/be/cities.txt
|
@@ -81,24 +85,29 @@ data/europe/se/cities.txt
|
|
81
85
|
data/europe/tr/cities.txt
|
82
86
|
data/europe/ua/cities.txt
|
83
87
|
data/europe/wa/cities.txt
|
88
|
+
data/langs.yml
|
84
89
|
data/oceania/1_codes/fifa.yml
|
85
90
|
data/oceania/1_codes/internet.yml
|
86
91
|
data/oceania/1_codes/iso3.yml
|
87
92
|
data/oceania/2_names/de.yml
|
88
93
|
data/oceania/3_more/en.wikipedia.yml
|
94
|
+
data/oceania/3_more/lang.yml
|
89
95
|
data/oceania/au/cities.txt
|
90
96
|
data/oceania/countries.txt
|
91
|
-
data/tags.yml
|
97
|
+
data/tags.1.yml
|
98
|
+
data/tags.3.yml
|
92
99
|
lib/worlddb.rb
|
93
100
|
lib/worlddb/cli/opts.rb
|
94
101
|
lib/worlddb/cli/runner.rb
|
95
102
|
lib/worlddb/console.rb
|
96
103
|
lib/worlddb/models/city.rb
|
97
104
|
lib/worlddb/models/country.rb
|
105
|
+
lib/worlddb/models/lang.rb
|
98
106
|
lib/worlddb/models/prop.rb
|
99
107
|
lib/worlddb/models/region.rb
|
100
108
|
lib/worlddb/models/tag.rb
|
101
109
|
lib/worlddb/models/tagging.rb
|
110
|
+
lib/worlddb/models/usage.rb
|
102
111
|
lib/worlddb/reader.rb
|
103
112
|
lib/worlddb/readers/code_reader.rb
|
104
113
|
lib/worlddb/readers/hash_reader.rb
|
data/Rakefile
CHANGED
@@ -77,7 +77,9 @@ namespace :dev do
|
|
77
77
|
|
78
78
|
|
79
79
|
desc 'worlddb - test loading of builtin fixtures'
|
80
|
-
task :test => [:clean, :create, :import]
|
80
|
+
task :test => [:clean, :create, :import]
|
81
81
|
|
82
|
+
desc 'worlddb - test loading of builtin fixtures (update)'
|
83
|
+
task :update => [:import]
|
82
84
|
|
83
85
|
end # namespace :dev
|
@@ -0,0 +1,63 @@
|
|
1
|
+
### official language mapping
|
2
|
+
## country code: language codes
|
3
|
+
|
4
|
+
ao: pt
|
5
|
+
bf: fr
|
6
|
+
bi: fr
|
7
|
+
bj: fr
|
8
|
+
bt: dz
|
9
|
+
bw: en,tn
|
10
|
+
cd: fr
|
11
|
+
cf: fr
|
12
|
+
cg: fr
|
13
|
+
ci: fr
|
14
|
+
cm: fr,en
|
15
|
+
cv: pt
|
16
|
+
dj: fr,ar,so
|
17
|
+
dz: ar
|
18
|
+
eg: ar
|
19
|
+
er: ti,ar,en
|
20
|
+
et: am,om
|
21
|
+
ga: fr
|
22
|
+
gh: en
|
23
|
+
gm: en
|
24
|
+
gn: fr
|
25
|
+
gq: es,fr,pt
|
26
|
+
gw: pt
|
27
|
+
ke: sw,en
|
28
|
+
km: ar,fr
|
29
|
+
ls: en,st
|
30
|
+
ly: ar
|
31
|
+
ma: ar
|
32
|
+
mg: mg,fr
|
33
|
+
ml: fr
|
34
|
+
mr: ar,fr
|
35
|
+
mu: fr,en # mfe
|
36
|
+
mw: en,ny
|
37
|
+
mz: pt
|
38
|
+
|
39
|
+
### todo: check sf - no iso code found -- check again and add it if exists to na
|
40
|
+
na: en,de
|
41
|
+
ne: fr
|
42
|
+
ng: en
|
43
|
+
rw: rw,fr,en
|
44
|
+
sc: fr,en # crs
|
45
|
+
sd: ar,en
|
46
|
+
sl: en
|
47
|
+
sn: fr
|
48
|
+
so: so,ar
|
49
|
+
st: pt
|
50
|
+
ss: en
|
51
|
+
sy: ar
|
52
|
+
sz: en,ss
|
53
|
+
td: fr,ar
|
54
|
+
tg: fr
|
55
|
+
tn: ar
|
56
|
+
to: en
|
57
|
+
tz: sw,en
|
58
|
+
ug: en,sw
|
59
|
+
za: zu,xh,af,st,tn,en
|
60
|
+
zm: en
|
61
|
+
zw: en,sn,nd
|
62
|
+
|
63
|
+
|
data/data/africa/countries.txt
CHANGED
@@ -52,11 +52,11 @@ st, São Tomé and Príncipe, STP, 1_001, 183_176, São Tomé, un|fifa|centr
|
|
52
52
|
###############
|
53
53
|
## Northern Africa
|
54
54
|
|
55
|
-
dz, Algeria, ALG, 2_381_740, 34_178_188, un|fifa|
|
56
|
-
eg, Egypt, EGY, 1_001_450, 82_868_000, un|fifa|
|
57
|
-
ma, Morocco, MAR, 446_550, 34_859_364, un|fifa|
|
58
|
-
ly, Libya, LBY, 1_759_540, 6_310_434, un|fifa|
|
59
|
-
tn, Tunisia, TUN, 163_610, 10_486_339, un|fifa|
|
55
|
+
dz, Algeria, ALG, 2_381_740, 34_178_188, un|fifa|northern africa
|
56
|
+
eg, Egypt, EGY, 1_001_450, 82_868_000, un|fifa|northern africa|middle_east
|
57
|
+
ma, Morocco, MAR, 446_550, 34_859_364, un|fifa|northern africa
|
58
|
+
ly, Libya, LBY, 1_759_540, 6_310_434, un|fifa|northern africa
|
59
|
+
tn, Tunisia, TUN, 163_610, 10_486_339, un|fifa|northern africa
|
60
60
|
ss, South Sudan, SSD, 619_745, 8_260_490, Juba, un|fifa|northern africa
|
61
61
|
sd, Sudan, SDN, 1_886_068, 30_894_000, Khartoum, un|fifa|northern africa
|
62
62
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
### official language mapping
|
2
|
+
## country code: language codes
|
3
|
+
|
4
|
+
## north america
|
5
|
+
|
6
|
+
ca: en,fr
|
7
|
+
us: en,es
|
8
|
+
mx: es
|
9
|
+
|
10
|
+
### todo: group by regions
|
11
|
+
## central america
|
12
|
+
|
13
|
+
|
14
|
+
## carribean
|
15
|
+
|
16
|
+
ag: en
|
17
|
+
|
18
|
+
|
19
|
+
## south america
|
20
|
+
|
21
|
+
ar: es
|
22
|
+
ve: es
|
23
|
+
br: pt
|
24
|
+
bo: es,qu,ay
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
tt: en
|
29
|
+
uy: es
|
30
|
+
vc: en
|
31
|
+
bb: en
|
32
|
+
bs: en
|
33
|
+
bz: en
|
34
|
+
cl: es
|
35
|
+
co: es
|
36
|
+
cr: es
|
37
|
+
cu: es
|
38
|
+
dm: en
|
39
|
+
do: es
|
40
|
+
ec: es
|
41
|
+
gd: en
|
42
|
+
gt: es
|
43
|
+
gy: en
|
44
|
+
hn: es
|
45
|
+
ht: fr,ht
|
46
|
+
jm: en
|
47
|
+
kn: en
|
48
|
+
lc: en
|
49
|
+
ni: es
|
50
|
+
pa: es
|
51
|
+
pe: es
|
52
|
+
pr: es,en
|
53
|
+
py: es,gn
|
54
|
+
sr: nl
|
55
|
+
sv: es
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
data/data/america/countries.txt
CHANGED
@@ -15,26 +15,26 @@
|
|
15
15
|
|
16
16
|
# area (in sq km), pop(ulation)
|
17
17
|
|
18
|
-
ca, Canada, CAN, 9_984_670, 34_278_406,
|
19
|
-
us, United States, USA, 9_629_091, 314_167_157,
|
20
|
-
mx, México [Mexico], MEX, 1_972_550, 112_322_757,
|
18
|
+
ca, Canada, CAN, 9_984_670, 34_278_406, un|fifa|north america|g8|g20|commonwealth
|
19
|
+
us, United States, USA, 9_629_091, 314_167_157, un|fifa|north america|g8|g20
|
20
|
+
mx, México [Mexico], MEX, 1_972_550, 112_322_757, un|fifa|north america|g5|g20
|
21
21
|
|
22
22
|
#####################
|
23
23
|
## central america & caribbean islands
|
24
24
|
|
25
|
-
bz, Belize, BLZ, 22_966, 307_000, un|fifa|
|
26
|
-
gt, Guatemala, GUA, 108_889, 14_027_000, un|fifa|
|
27
|
-
cr, Costa Rica, CRC, 51_100, 4_579_000, un|fifa|
|
28
|
-
hn, Honduras, HON, 112_492, 7_466_000, un|fifa|
|
29
|
-
sv, El Salvador, SLV, 21_041, 6_163_000, un|fifa|
|
30
|
-
ni, Nicaragua, NCA, 130_373, 5_743_000, un|fifa|
|
31
|
-
pa, Panamá [Panama], PAN, 75_417, 3_454_000, un|fifa|
|
25
|
+
bz, Belize, BLZ, 22_966, 307_000, un|fifa|central_america|commonwealth
|
26
|
+
gt, Guatemala, GUA, 108_889, 14_027_000, un|fifa|central_america
|
27
|
+
cr, Costa Rica, CRC, 51_100, 4_579_000, un|fifa|central_america
|
28
|
+
hn, Honduras, HON, 112_492, 7_466_000, un|fifa|central_america
|
29
|
+
sv, El Salvador, SLV, 21_041, 6_163_000, un|fifa|central_america
|
30
|
+
ni, Nicaragua, NCA, 130_373, 5_743_000, un|fifa|central_america
|
31
|
+
pa, Panamá [Panama], PAN, 75_417, 3_454_000, un|fifa|central_america
|
32
32
|
|
33
33
|
|
34
|
-
cu, Cuba, CUB, 109_886, 11_204_000, un|fifa|
|
35
|
-
jm, Jamaica, JAM, 10_991, 2_719_000, un|fifa|
|
36
|
-
pr, Puerto Rico, PUR, 8_870, 3_982_000, country:us, fifa|
|
37
|
-
do, Dominican Republic, DOM, 48_671, 10_090_000, un|fifa|
|
34
|
+
cu, Cuba, CUB, 109_886, 11_204_000, un|fifa|caribbean
|
35
|
+
jm, Jamaica, JAM, 10_991, 2_719_000, un|fifa|caribbean|commonwealth
|
36
|
+
pr, Puerto Rico, PUR, 8_870, 3_982_000, country:us, fifa|caribbean
|
37
|
+
do, Dominican Republic, DOM, 48_671, 10_090_000, un|fifa|caribbean
|
38
38
|
ht, Haiti, HAI, 27_750, 9_719_932, Port-au-Prince, un|fifa|caribbean
|
39
39
|
|
40
40
|
ag, Antigua and Barbuda, ATG, 440, 81_799, Saint John's, un|fifa|caribbean|microstate|commonwealth
|
@@ -56,15 +56,15 @@ tt, Trinidad and Tobago, TRI, 5_131, 1_346_350, Port of Spain, un|fifa|car
|
|
56
56
|
## - southern cone - see http://en.wikipedia.org/wiki/Southern_Cone
|
57
57
|
## - andean states - see http://en.wikipedia.org/wiki/Andean_states
|
58
58
|
|
59
|
-
ar, Argentina, ARG, 2_780_400, 40_518_425, un|fifa|
|
60
|
-
bo, Bolivia, BOL, 1_098_581, 10_907_778, un|fifa|
|
61
|
-
br, Brazil, BRA, 8_514_215, 192_380_000, un|fifa|
|
62
|
-
cl, Chile, CHI, 755_696, 16_763_470, un|fifa|south america|
|
63
|
-
co, Colombia, COL, 1_138_748, 46_413_791, un|fifa|south america|
|
64
|
-
ec, Ecuador, ECU, 258_238, 15_007_343, un|fifa|south america|
|
65
|
-
gy, Guyana, GUY, 214_970, 752_940, un|fifa|south america|
|
66
|
-
pe, Peru, PER, 1_285_220, 29_546_963, un|fifa|south america|
|
67
|
-
sr, Suriname, SUR, 163_820, 492_829, un|fifa|south america
|
68
|
-
py, Paraguay, PAR, 406_752, 6_375_830, un|fifa|south america|
|
69
|
-
uy, Uruguay, URU, 176_215, 3_510_386, un|fifa|south america|
|
70
|
-
ve, Venezuela, VEN, 916_445, 28_833_845, un|fifa|south america|
|
59
|
+
ar, Argentina, ARG, 2_780_400, 40_518_425, un|fifa|south america|g20|southern_cone
|
60
|
+
bo, Bolivia, BOL, 1_098_581, 10_907_778, un|fifa|south america|andean_states
|
61
|
+
br, Brazil, BRA, 8_514_215, 192_380_000, un|fifa|south america|g5|g20|southern_cone
|
62
|
+
cl, Chile, CHI, 755_696, 16_763_470, un|fifa|south america|southern_cone
|
63
|
+
co, Colombia, COL, 1_138_748, 46_413_791, un|fifa|south america|andean_states
|
64
|
+
ec, Ecuador, ECU, 258_238, 15_007_343, un|fifa|south america|andean_states
|
65
|
+
gy, Guyana, GUY, 214_970, 752_940, un|fifa|south america|commonwealth
|
66
|
+
pe, Peru, PER, 1_285_220, 29_546_963, un|fifa|south america|andean_states
|
67
|
+
sr, Suriname, SUR, 163_820, 492_829, un|fifa|south america
|
68
|
+
py, Paraguay, PAR, 406_752, 6_375_830, un|fifa|south america|southern_cone
|
69
|
+
uy, Uruguay, URU, 176_215, 3_510_386, un|fifa|south america|southern_cone
|
70
|
+
ve, Venezuela, VEN, 916_445, 28_833_845, un|fifa|south america|andean_states ## add to andean too - why? why not?
|
@@ -0,0 +1,55 @@
|
|
1
|
+
### official language mapping
|
2
|
+
## country code: language codes
|
3
|
+
|
4
|
+
ae: ar
|
5
|
+
af: fa,ps
|
6
|
+
am: hy
|
7
|
+
az: az
|
8
|
+
bd: bn
|
9
|
+
bh: ar
|
10
|
+
bn: ms
|
11
|
+
cn: zh
|
12
|
+
ge: ka
|
13
|
+
hk: zh,en
|
14
|
+
id: id
|
15
|
+
il: he
|
16
|
+
in: hi,en
|
17
|
+
iq: ar,ku
|
18
|
+
ir: fa
|
19
|
+
jo: ar
|
20
|
+
jp: ja
|
21
|
+
kg: ky,ru
|
22
|
+
kh: km
|
23
|
+
kp: ko
|
24
|
+
kr: ko,en
|
25
|
+
kw: ar
|
26
|
+
kz: kk,ru
|
27
|
+
la: lo
|
28
|
+
lb: ar,fr
|
29
|
+
lk: si,ta
|
30
|
+
lr: en
|
31
|
+
mm: my
|
32
|
+
mn: mn
|
33
|
+
mv: dv
|
34
|
+
my: ms
|
35
|
+
np: ne
|
36
|
+
om: ar
|
37
|
+
ph: en,tl
|
38
|
+
pk: en,ur
|
39
|
+
ps: ar,he
|
40
|
+
qa: ar
|
41
|
+
sa: ar
|
42
|
+
sg: en,ms,zh,ta
|
43
|
+
th: th
|
44
|
+
tj: tg,ru
|
45
|
+
tl: pt # tet
|
46
|
+
tm: tk
|
47
|
+
tw: zh
|
48
|
+
uz: uz # kaa
|
49
|
+
vn: vi
|
50
|
+
ye: ar
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
### europe
|
2
|
+
|
3
|
+
ad: ca
|
4
|
+
at: de
|
5
|
+
ba: bs,hr,sr
|
6
|
+
be: nl,fr,de
|
7
|
+
ch: de,fr,it,rm
|
8
|
+
bg: bg
|
9
|
+
by: be,ru
|
10
|
+
cy: el,tr
|
11
|
+
cz: cs
|
12
|
+
de: de
|
13
|
+
dk: da
|
14
|
+
ee: et
|
15
|
+
es: ca,es,eu,gl
|
16
|
+
fi: fi,sv,se
|
17
|
+
fo: fo
|
18
|
+
fr: fr
|
19
|
+
gb: en,ga,cy,gd,kw
|
20
|
+
gr: el
|
21
|
+
hr: hr
|
22
|
+
hu: hu
|
23
|
+
ie: en,ga
|
24
|
+
is: is
|
25
|
+
it: it,de,fr
|
26
|
+
li: de
|
27
|
+
lt: lt
|
28
|
+
lu: lb,fr,de
|
29
|
+
lv: lv
|
30
|
+
mc: fr
|
31
|
+
md: ru,uk,ro
|
32
|
+
mk: mk
|
33
|
+
mt: mt,en
|
34
|
+
nl: nl
|
35
|
+
no: nb,nn,no,se
|
36
|
+
pl: pl
|
37
|
+
pt: pt
|
38
|
+
ro: ro
|
39
|
+
rs: sr
|
40
|
+
ru: ru
|
41
|
+
se: sv
|
42
|
+
si: sl
|
43
|
+
sk: sk
|
44
|
+
sm: it
|
45
|
+
tr: tr
|
46
|
+
ua: uk
|
47
|
+
va: it
|
data/data/europe/countries.txt
CHANGED
@@ -57,32 +57,32 @@ eu, European Union, EUR, 4_324_782, 503_492_041, supra, g20 # NB: no FIFA c
|
|
57
57
|
|
58
58
|
## todo: auto-add tag eu?? for supra:eu?
|
59
59
|
|
60
|
-
at, Austria, AUT, 83_871, 8_414_638, supra:eu, un|fifa|uefa|
|
61
|
-
be, Belgium, BEL, 30_528, 11_007_020, supra:eu, un|fifa|uefa|
|
60
|
+
at, Austria, AUT, 83_871, 8_414_638, supra:eu, un|fifa|uefa|eu|euro|schengen|central_europe|western_europe
|
61
|
+
be, Belgium, BEL, 30_528, 11_007_020, supra:eu, un|fifa|uefa|eu|euro|schengen|benelux
|
62
62
|
cy, Cyprus, CYP, 9_251, 1_099_341, supra:eu, un|fifa|uefa|eu|euro|asia|western asia|middle_east|mediterranean|southern_europe|commonwealth
|
63
|
-
de, Germany, GER, 357_050, 81_799_600, supra:eu, un|fifa|uefa|
|
63
|
+
de, Germany, GER, 357_050, 81_799_600, supra:eu, un|fifa|uefa|eu|euro|schengen|g8|g20|western europe|central_europe
|
64
64
|
ee, Estonia, EST, 45_226, 1_340_194, supra:eu, un|fifa|uefa|eu|euro|schengen|uefa|northern europe|baltic|eastern_europe
|
65
65
|
es, Spain, ESP, 506_030, 46_030_109, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe
|
66
66
|
fi, Finland, FIN, 338_145, 5_391_700, supra:eu, un|fifa|uefa|eu|euro|schengen|northern europe|nordic
|
67
67
|
fr, France, FRA, 674_843, 65_821_885, supra:eu, un|fifa|uefa|eu|euro|schengen|g8|g20|western_europe
|
68
|
-
gr, Greece, GRE, 131_990, 10_787_690, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe
|
68
|
+
gr, Greece, GRE, 131_990, 10_787_690, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe
|
69
69
|
ie, Irland, IRL, 70_273, 4_722_028, supra:eu, un|fifa|uefa|eu|euro|northern europe|western_europe
|
70
70
|
it, Italy, ITA, 301_318, 60_681_514, supra:eu, un|fifa|uefa|eu|euro|schengen|g8|g20|southern_europe
|
71
71
|
lu, Luxembourg, LUX, 2_586, 511_840, supra:eu, un|fifa|uefa|eu|euro|schengen|benelux|western_europe
|
72
72
|
mt, Malta, MLT, 316, 417_608, supra:eu, un|fifa|uefa|eu|euro|schengen|microstate|mediterranean|southern_europe|commonwealth
|
73
73
|
nl, Netherlands, NED, 41_526, 16_703_700, supra:eu, un|fifa|uefa|eu|euro|schengen|benelux|western_europe
|
74
|
-
pt, Portugal, POR, 92_391, 10_647_763, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe
|
74
|
+
pt, Portugal, POR, 92_391, 10_647_763, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe
|
75
75
|
sk, Slovakia, SVK, 49_037, 5_440_078, supra:eu, un|fifa|uefa|eu|euro|schengen|central_europe
|
76
76
|
si, Slovenia, SVN, 20_273, 2_048_951, supra:eu, un|fifa|uefa|eu|euro|schengen|southern_europe|central_europe
|
77
77
|
|
78
|
-
bg, Bulgaria, BUL, 110_994, 7_364_570, supra:eu, un|fifa|uefa|eu|eastern_europe|southeastern_europe
|
78
|
+
bg, Bulgaria, BUL, 110_994, 7_364_570, supra:eu, un|fifa|uefa|eu|eastern_europe|southeastern_europe
|
79
79
|
dk, Denmark, DEN, 43_094, 5_564_219, supra:eu, un|fifa|uefa|eu|schengen|northern europe|scandinavia|nordic
|
80
80
|
lv, Latvija [Latvia], LVA, 64_589, 2_245_357, supra:eu, un|fifa|uefa|eu|schengen|northern europe|baltic
|
81
81
|
lt, Lithuania, LTU, 65_303, 3_207_060, supra:eu, un|fifa|uefa|eu|schengen|northern europe|baltic
|
82
82
|
pl, Poland, POL, 312_683, 38_186_860, supra:eu, un|fifa|uefa|eu|schengen|central_europe
|
83
83
|
ro, Romania, ROU, 238_391, 19_042_936, supra:eu, un|fifa|uefa|eu|eastern_europe|southeastern_europe
|
84
84
|
se, Sweden, SWE, 449_964, 9_415_570, supra:eu, un|fifa|uefa|eu|schengen|northern europe|scandinavia|nordic
|
85
|
-
cz, Czech Republic, CZE, 78_866, 10_535_811, supra:eu, un|fifa|uefa|
|
85
|
+
cz, Czech Republic, CZE, 78_866, 10_535_811, supra:eu, un|fifa|uefa|eu|schengen|central_europe
|
86
86
|
hu, Hungary, HUN, 93_030, 9_979_000, supra:eu, un|fifa|uefa|eu|schengen|central_europe
|
87
87
|
|
88
88
|
## todo/check: make a distinction between uk and gb?
|
@@ -102,8 +102,8 @@ fo, Faroe Islands, FRO, 1_399, 49_267, country:dk, fifa|uefa|northern
|
|
102
102
|
ad, Andorra, AND, 468, 85_082, un|fifa|uefa|microstate|western_europe
|
103
103
|
al, Albania, ALB, 28_748, 3_002_859, un|fifa|uefa|southern_europe|balkans|southeastern_europe
|
104
104
|
by, Belarus, BLR, 207_600, 9_643_566, un|fifa|uefa|eastern_europe
|
105
|
-
ch, Switzerland, SUI, 41_285, 7_866_500, un|fifa|uefa|
|
106
|
-
hr, Croatia, CRO, 56_594, 4_290_612, un|fifa|uefa|southern_europe|balkans|southeastern_europe
|
105
|
+
ch, Switzerland, SUI, 41_285, 7_866_500, un|fifa|uefa|schengen|western_europe|central_europe
|
106
|
+
hr, Croatia, CRO, 56_594, 4_290_612, un|fifa|uefa|southern_europe|balkans|southeastern_europe
|
107
107
|
rs, Serbia, SRB, 88_361, 7_276_604, un|fifa|uefa|southern_europe|balkans|southeastern_europe
|
108
108
|
ru, Russia, RUS, 17_098_242, 142_517_670, un|fifa|uefa|asia|north asia|g8|g20
|
109
109
|
tr, Turkey, TUR, 783_562, 79_749_461, un|fifa|uefa|asia|western asia|g20
|
data/data/langs.yml
ADDED
@@ -0,0 +1,210 @@
|
|
1
|
+
## list of languages
|
2
|
+
## use key / title e.g. en: English, de: Deutsch etc.
|
3
|
+
|
4
|
+
## for a list see http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
|
5
|
+
|
6
|
+
|
7
|
+
### languages in europe
|
8
|
+
|
9
|
+
sq: Albanian
|
10
|
+
eu: Basque
|
11
|
+
be: Belarusian
|
12
|
+
bs: Bosnian
|
13
|
+
br: Breton
|
14
|
+
bg: Bulgarian
|
15
|
+
ca: Catalan
|
16
|
+
kw: Cornish
|
17
|
+
co: Corsican
|
18
|
+
hr: Croatian
|
19
|
+
cs: Czech
|
20
|
+
da: Danish
|
21
|
+
nl: Dutch
|
22
|
+
en: English
|
23
|
+
et: Estonian
|
24
|
+
fo: Faroese
|
25
|
+
fi: Finnish
|
26
|
+
fr: French
|
27
|
+
gl: Galician
|
28
|
+
de: German
|
29
|
+
el: Greek
|
30
|
+
hu: Hungarian
|
31
|
+
ga: Irish
|
32
|
+
is: Icelandic
|
33
|
+
it: Italian
|
34
|
+
lb: Luxembourgish
|
35
|
+
lt: Lithuanian
|
36
|
+
lv: Latvian
|
37
|
+
mk: Macedonian
|
38
|
+
mt: Maltese
|
39
|
+
nb: Norwegian Bokmål
|
40
|
+
nn: Norwegian Nynorsk
|
41
|
+
no: Norwegian
|
42
|
+
pl: Polish
|
43
|
+
pt: Portuguese
|
44
|
+
rm: Romansh
|
45
|
+
ro: Romanian|Moldavian
|
46
|
+
ru: Russian
|
47
|
+
sc: Sardinian
|
48
|
+
sr: Serbian
|
49
|
+
gd: Scottish Gaelic|Gaelic
|
50
|
+
es: Spanish|Castilian
|
51
|
+
sv: Swedish
|
52
|
+
sk: Slovak
|
53
|
+
sl: Slovene
|
54
|
+
tr: Turkish
|
55
|
+
uk: Ukrainian
|
56
|
+
wa: Walloon
|
57
|
+
cy: Welsh
|
58
|
+
fy: Western Frisian
|
59
|
+
yi: Yiddish
|
60
|
+
kl: Kalaallisut|Greenlandic
|
61
|
+
|
62
|
+
### america
|
63
|
+
|
64
|
+
ay: Aymara
|
65
|
+
cr: Cree
|
66
|
+
gn: Guaraní
|
67
|
+
ht: Haitian
|
68
|
+
ik: Inupiaq
|
69
|
+
iu: Inuktitut
|
70
|
+
nv: Navajo
|
71
|
+
oj: Ojibwe
|
72
|
+
qu: Quechua
|
73
|
+
|
74
|
+
|
75
|
+
### todo: group/sort by continent
|
76
|
+
### africa / asia / oceania
|
77
|
+
|
78
|
+
ab: Abkhaz
|
79
|
+
aa: Afar
|
80
|
+
af: Afrikaans
|
81
|
+
ak: Akan
|
82
|
+
am: Amharic
|
83
|
+
ar: Arabic
|
84
|
+
an: Aragonese
|
85
|
+
hy: Armenian
|
86
|
+
as: Assamese
|
87
|
+
av: Avaric
|
88
|
+
ae: Avestan
|
89
|
+
az: Azerbaijani
|
90
|
+
bm: Bambara
|
91
|
+
ba: Bashkir
|
92
|
+
bn: Bengali
|
93
|
+
bh: Bihari
|
94
|
+
bi: Bislama
|
95
|
+
my: Burmese
|
96
|
+
ch: Chamorro
|
97
|
+
ce: Chechen
|
98
|
+
ny: Chichewa
|
99
|
+
zh: Chinese
|
100
|
+
cv: Chuvash
|
101
|
+
dv: Divehi
|
102
|
+
dz: Dzongkha
|
103
|
+
ee: Ewe
|
104
|
+
fj: Fijian
|
105
|
+
ff: Fula
|
106
|
+
ka: Georgian
|
107
|
+
gu: Gujarati
|
108
|
+
ha: Hausa
|
109
|
+
he: Hebrew
|
110
|
+
hz: Herero
|
111
|
+
hi: Hindi
|
112
|
+
ho: Hiri Motu
|
113
|
+
id: Indonesian
|
114
|
+
ig: Igbo
|
115
|
+
ja: Japanese
|
116
|
+
jv: Javanese
|
117
|
+
kn: Kannada
|
118
|
+
kr: Kanuri
|
119
|
+
ks: Kashmiri
|
120
|
+
kk: Kazakh
|
121
|
+
km: Khmer|Cambodian
|
122
|
+
ki: Kikuyu
|
123
|
+
rw: Kinyarwanda
|
124
|
+
ky: Kyrgyz
|
125
|
+
kv: Komi
|
126
|
+
kg: Kongo
|
127
|
+
ko: Korean
|
128
|
+
ku: Kurdish
|
129
|
+
kj: Kwanyama
|
130
|
+
lg: Ganda
|
131
|
+
li: Limburgish
|
132
|
+
ln: Lingala
|
133
|
+
lo: Lao
|
134
|
+
lu: Luba-Katanga
|
135
|
+
gv: Manx
|
136
|
+
mg: Malagasy
|
137
|
+
ms: Malay
|
138
|
+
ml: Malayalam
|
139
|
+
mi: Māori
|
140
|
+
mr: Marathi
|
141
|
+
mh: Marshallese
|
142
|
+
mn: Mongolian
|
143
|
+
na: Nauru
|
144
|
+
nd: North Ndebele
|
145
|
+
ne: Nepali
|
146
|
+
ng: Ndonga
|
147
|
+
ii: Nuosu
|
148
|
+
nr: South Ndebele
|
149
|
+
oc: Occitan
|
150
|
+
om: Oromo
|
151
|
+
or: Oriya
|
152
|
+
os: Ossetian|Ossetic
|
153
|
+
pa: Panjabi|Punjabi
|
154
|
+
pi: Pāli
|
155
|
+
fa: Persian
|
156
|
+
ps: Pashto
|
157
|
+
rn: Kirundi
|
158
|
+
sa: Sanskrit
|
159
|
+
sd: Sindhi
|
160
|
+
se: Northern Sami
|
161
|
+
sm: Samoan
|
162
|
+
sg: Sango
|
163
|
+
sn: Shona
|
164
|
+
si: Sinhala
|
165
|
+
so: Somali
|
166
|
+
st: Southern Sotho
|
167
|
+
su: Sundanese
|
168
|
+
sw: Swahili
|
169
|
+
ss: Swati
|
170
|
+
ta: Tamil
|
171
|
+
te: Telugu
|
172
|
+
tg: Tajik
|
173
|
+
th: Thai
|
174
|
+
ti: Tigrinya
|
175
|
+
bo: Tibetan
|
176
|
+
tk: Turkmen
|
177
|
+
tl: Tagalog
|
178
|
+
tn: Tswana
|
179
|
+
to: Tonga
|
180
|
+
ts: Tsonga
|
181
|
+
tt: Tatar
|
182
|
+
tw: Twi
|
183
|
+
ty: Tahitiani
|
184
|
+
ug: Uighur
|
185
|
+
ur: Urdu
|
186
|
+
uz: Uzbek
|
187
|
+
ve: Venda
|
188
|
+
vi: Vietnamese
|
189
|
+
wo: Wolof
|
190
|
+
xh: Xhosa
|
191
|
+
yo: Yoruba
|
192
|
+
za: Zhuang
|
193
|
+
zu: Zulu
|
194
|
+
|
195
|
+
|
196
|
+
#####################
|
197
|
+
## constructed languages
|
198
|
+
|
199
|
+
## eo: Esperanto
|
200
|
+
## ia: Interlingua
|
201
|
+
## ie: Interlingue
|
202
|
+
## io: Ido
|
203
|
+
## vo: Volapük
|
204
|
+
|
205
|
+
|
206
|
+
#########################
|
207
|
+
## acient
|
208
|
+
|
209
|
+
## la: Latin
|
210
|
+
## cu: Old Church Slavonic|Old Bulgarian
|
data/data/oceania/countries.txt
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
## - Polynesia
|
9
9
|
|
10
10
|
|
11
|
-
au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|
|
11
|
+
au, Australia, AUS, 7_686_850, 22_028_000, un|fifa|g20|commonwealth
|
12
12
|
|
13
13
|
##############
|
14
14
|
### Melanesia
|
@@ -34,7 +34,7 @@ ki, Kiribati, KIR, 811, 103_500, Tarawa, un|micronesia|microstate|commonwealth
|
|
34
34
|
## Polynesia
|
35
35
|
## see http://en.wikipedia.org/wiki/Polynesia
|
36
36
|
|
37
|
-
nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|
|
37
|
+
nz, New Zealand, NZL, 268_680, 4_108_037, un|fifa|polynesia|commonwealth
|
38
38
|
ws, Samoa, SAM, 2_831, 194_320, Apia, un|fifa|polynesia|commonwealth
|
39
39
|
to, Tonga, TGA, 748, 103_036, Nuku'alofa, un|fifa|polynesia|microstate|commonwealth
|
40
40
|
tv, Tuvalu, TUV, 26, 10_544, Funafuti, un|polynesia|microstate|commonwealth # nb: use ISO code; NOT fifa member
|
data/data/tags.1.yml
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
### setup some grade 1 tags
|
2
|
+
|
3
|
+
|
4
|
+
###
|
5
|
+
# for 193 un members see www.un.org/en/members
|
6
|
+
|
7
|
+
|
8
|
+
## some tags
|
9
|
+
|
10
|
+
continents: africa, america, asia, europe, oceania
|
11
|
+
|
12
|
+
####################
|
13
|
+
# geo regions
|
14
|
+
africa: northern africa, western africa, central africa, eastern africa, southern africa
|
15
|
+
|
16
|
+
americas: north america, south america, central america, caribbean (islands)
|
17
|
+
|
18
|
+
## check if yaml can handle key w/ space??
|
19
|
+
south_america: andean states, southern cone
|
20
|
+
|
21
|
+
europe: northern europe, southern europe, western europe, central europe, eastern europe, southeastern europe
|
22
|
+
|
23
|
+
## more regions
|
24
|
+
europe2: baltic (states), scandinavia, nordic (countries), balkans
|
25
|
+
|
26
|
+
|
27
|
+
## get used for more than one continent
|
28
|
+
more: middle east, indian ocean, atlantic ocean
|
29
|
+
|
30
|
+
# orgs
|
31
|
+
|
32
|
+
orgs: un, g5, g8, g20, eu, commonwealth, mercosur, nafta
|
33
|
+
football: fifa, uefa, afc, ofc, caf, csf, concacaf
|
34
|
+
|
35
|
+
europe3: benelux, euro, schengen
|
36
|
+
|
37
|
+
misc: microstate
|
38
|
+
|
39
|
+
# national regions
|
40
|
+
brasil: s|South, se|Southeast, co|Centerwest, ne|Northeast, n|North
|
data/data/tags.3.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
### setup some grade 3 tags
|
2
|
+
|
3
|
+
|
4
|
+
####
|
5
|
+
|
6
|
+
# categorize countries into brackets by area
|
7
|
+
area: area_1_000_000_n_up,
|
8
|
+
area_100_000_to_1_000_000,
|
9
|
+
area_1_000_to_100_000,
|
10
|
+
area_1_000_n_less # microstate
|
11
|
+
|
12
|
+
# include all
|
13
|
+
area2: area_100_000_n_up, area_1_000_n_up
|
14
|
+
|
15
|
+
|
16
|
+
# categorize countries into brackets by pop(ulation)
|
17
|
+
pop: pop_100m_n_up,
|
18
|
+
pop_10m_to_100m,
|
19
|
+
pop_1m_to_10m,
|
20
|
+
pop_1m_n_less
|
21
|
+
|
22
|
+
# include all
|
23
|
+
pop2: pop_10m_n_up, pop_1m_n_up
|
@@ -5,6 +5,9 @@ module WorldDB::Models
|
|
5
5
|
class Country < ActiveRecord::Base
|
6
6
|
self.table_name = 'countries'
|
7
7
|
|
8
|
+
has_many :usages
|
9
|
+
has_many :langs, :through => :usages # lang(uage)s through usages (that is, countries_langs) join table
|
10
|
+
|
8
11
|
has_many :regions, :class_name => 'Region', :foreign_key => 'country_id'
|
9
12
|
has_many :cities, :class_name => 'City', :foreign_key => 'country_id'
|
10
13
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module WorldDB::Models
|
4
|
+
|
5
|
+
class Lang < ActiveRecord::Base
|
6
|
+
|
7
|
+
has_many :usages # join table for countries_langs
|
8
|
+
|
9
|
+
has_many :countries, :through => :usages
|
10
|
+
|
11
|
+
validates :key, :format => { :with => /^[a-z]{2}$/, :message => 'expected two lowercase letters a-z' }
|
12
|
+
|
13
|
+
end # class Lang
|
14
|
+
|
15
|
+
end # module WorldDB::Models
|
data/lib/worlddb/models/tag.rb
CHANGED
@@ -10,17 +10,21 @@ module WorldDB::Models
|
|
10
10
|
has_many :countries, :through => :taggings, :source => :taggable, :source_type => 'WorldDB::Models::Country', :class_name => 'Country'
|
11
11
|
has_many :regions, :through => :taggings, :source => :taggable, :source_type => 'WorldDB::Models::Region', :class_name => 'Region'
|
12
12
|
|
13
|
-
##
|
14
|
-
validates :key,
|
13
|
+
## nb: only allow spaces and underscore inbetween; do not allow digit as first char
|
14
|
+
validates :key, :format => { :with => /^[a-z]$|^[a-z][a-z0-9_ ]*[a-z0-9]$/, :message => 'expected one or more lowercase letters a-z or 0-9 digits or space or underscore' }
|
15
15
|
|
16
16
|
scope :by_key, order( 'key desc' )
|
17
17
|
scope :by_title, order( 'title desc' )
|
18
|
+
scope :top, where( 'grade=1' )
|
18
19
|
|
19
20
|
before_save :on_before_save
|
20
21
|
|
21
22
|
def on_before_save
|
22
23
|
# replace space with underscore e.g. north america becomes north_america and so on
|
23
24
|
self.slug = key.gsub( ' ', '_' )
|
25
|
+
|
26
|
+
## if title is empty auto fill w/ key
|
27
|
+
self.title = key if title.blank?
|
24
28
|
end
|
25
29
|
|
26
30
|
end # class Tag
|
data/lib/worlddb/reader.rb
CHANGED
@@ -41,7 +41,11 @@ class Reader
|
|
41
41
|
|
42
42
|
def load_with_include_path( name, include_path )
|
43
43
|
|
44
|
-
if name =~
|
44
|
+
if name =~ /^lang/
|
45
|
+
load_langs_with_include_path( name, include_path )
|
46
|
+
elsif name =~ /\/lang/
|
47
|
+
load_usages_with_include_path( name, include_path )
|
48
|
+
elsif name =~ /\/fifa/
|
45
49
|
load_xxx_with_include_path( 'fifa', name, include_path )
|
46
50
|
elsif name =~ /\/iso3/
|
47
51
|
load_xxx_with_include_path( 'iso3', name, include_path )
|
@@ -49,6 +53,8 @@ class Reader
|
|
49
53
|
load_xxx_with_include_path( 'net', name, include_path )
|
50
54
|
elsif name =~ /\/motor/
|
51
55
|
load_xxx_with_include_path( 'motor', name, include_path )
|
56
|
+
elsif name =~ /^tag.*\.(\d)$/
|
57
|
+
load_tags_with_include_path( name, include_path, :grade => $1.to_i )
|
52
58
|
elsif name =~ /^([a-z]{3,})\/countries/ # e.g. africa/countries or america/countries
|
53
59
|
## auto-add continent (from folder structure) as tag
|
54
60
|
load_countries_with_include_path( name, include_path, :tags => $1 )
|
@@ -63,6 +69,7 @@ class Reader
|
|
63
69
|
# todo/fix: exit w/ error
|
64
70
|
end
|
65
71
|
end
|
72
|
+
|
66
73
|
|
67
74
|
def load_builtin( name ) ## convenience helper (requires proper named files w/ convention)
|
68
75
|
load_with_include_path( name, WorldDB.data_path )
|
@@ -102,6 +109,139 @@ class Reader
|
|
102
109
|
end
|
103
110
|
|
104
111
|
|
112
|
+
def load_langs_with_include_path( name, include_path )
|
113
|
+
path = "#{include_path}/#{name}.yml"
|
114
|
+
|
115
|
+
puts "*** parsing data '#{name}' (#{path})..."
|
116
|
+
|
117
|
+
reader = HashReader.new( logger, path )
|
118
|
+
|
119
|
+
reader.each do |key, value|
|
120
|
+
|
121
|
+
puts "adding lang >>#{key}<< >>#{value}<<..."
|
122
|
+
|
123
|
+
lang_key = key.strip
|
124
|
+
lang_title = value.strip
|
125
|
+
|
126
|
+
lang_attribs = {}
|
127
|
+
|
128
|
+
## check if it exists
|
129
|
+
lang = Lang.find_by_key( lang_key )
|
130
|
+
if lang.present?
|
131
|
+
puts "*** update lang #{lang.id}-#{lang.key}:"
|
132
|
+
else
|
133
|
+
puts "*** create lang:"
|
134
|
+
lang = Lang.new
|
135
|
+
lang_attribs[ :key ] = lang_key
|
136
|
+
end
|
137
|
+
|
138
|
+
lang_attribs[ :title ] = lang_title
|
139
|
+
|
140
|
+
puts lang_attribs.to_json
|
141
|
+
|
142
|
+
lang.update_attributes!( lang_attribs )
|
143
|
+
end # each key,value
|
144
|
+
|
145
|
+
Prop.create_from_worlddb_fixture!( name, path )
|
146
|
+
end
|
147
|
+
|
148
|
+
def load_langs_builtin( name )
|
149
|
+
load_langs_with_include_path( name, WorldDB.data_path )
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
def load_tags_with_include_path( name, include_path, more_values={} )
|
154
|
+
path = "#{include_path}/#{name}.yml"
|
155
|
+
|
156
|
+
puts "*** parsing data '#{name}' (#{path})..."
|
157
|
+
|
158
|
+
reader = HashReader.new( logger, path )
|
159
|
+
|
160
|
+
grade = 1
|
161
|
+
|
162
|
+
if more_values[:grade].present?
|
163
|
+
grade = more_values[:grade].to_i
|
164
|
+
end
|
165
|
+
|
166
|
+
reader.each do |key, value|
|
167
|
+
### split value by comma (e.g. northern america,southern america, etc.)
|
168
|
+
puts "adding grade #{grade} tags >>#{key}<< >>#{value}<<..."
|
169
|
+
tag_pairs = value.split(',')
|
170
|
+
tag_pairs.each do |pair|
|
171
|
+
## split key|title
|
172
|
+
values = pair.split('|')
|
173
|
+
|
174
|
+
key = values[0]
|
175
|
+
### remove (optional comment) from key (e.g. carribean (islands))
|
176
|
+
key = key.gsub( /\(.+\)/, '' )
|
177
|
+
## remove leading n trailing space
|
178
|
+
key = key.strip
|
179
|
+
|
180
|
+
title = values[1] || '' # nb: title might be empty/missing
|
181
|
+
title = title.strip
|
182
|
+
|
183
|
+
tag_attribs = {}
|
184
|
+
|
185
|
+
## check if it exists
|
186
|
+
## todo/fix: add country_id for lookup?
|
187
|
+
tag = Tag.find_by_key( key )
|
188
|
+
if tag.present?
|
189
|
+
puts "*** update tag #{tag.id}-#{tag.key}:"
|
190
|
+
else
|
191
|
+
puts "*** create tag:"
|
192
|
+
tag = Tag.new
|
193
|
+
tag_attribs[ :key ] = key
|
194
|
+
end
|
195
|
+
|
196
|
+
tag_attribs[ :title ] = title
|
197
|
+
tag_attribs[ :grade ] = grade
|
198
|
+
|
199
|
+
puts tag_attribs.to_json
|
200
|
+
|
201
|
+
tag.update_attributes!( tag_attribs )
|
202
|
+
end
|
203
|
+
end # each key,value
|
204
|
+
|
205
|
+
Prop.create_from_worlddb_fixture!( name, path )
|
206
|
+
end # method load_tags_with_include_path
|
207
|
+
|
208
|
+
def load_tags_builtin( name, include_path, more_values={} )
|
209
|
+
load_tags_with_include_path( name, WorldDB.data_path, more_values )
|
210
|
+
end
|
211
|
+
|
212
|
+
|
213
|
+
def load_usages_with_include_path( name, include_path )
|
214
|
+
path = "#{include_path}/#{name}.yml"
|
215
|
+
|
216
|
+
puts "*** parsing data '#{name}' (#{path})..."
|
217
|
+
|
218
|
+
reader = HashReader.new( logger, path )
|
219
|
+
|
220
|
+
reader.each do |key, value|
|
221
|
+
puts " adding langs >>#{value}<<to country >>#{key}<<"
|
222
|
+
|
223
|
+
country = Country.find_by_key!( key )
|
224
|
+
|
225
|
+
lang_keys = value.split(',')
|
226
|
+
lang_keys.each do |lang_key|
|
227
|
+
|
228
|
+
### remove (optional comment) from key (e.g. carribean (islands))
|
229
|
+
lang_key = lang_key.gsub( /\(.+\)/, '' )
|
230
|
+
## remove leading n trailing space
|
231
|
+
lang_key = lang_key.strip
|
232
|
+
|
233
|
+
lang = Lang.find_by_key!( lang_key )
|
234
|
+
Usage.create!( country_id: country.id, lang_id: lang.id, official: true, minor: false )
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
Prop.create_from_worlddb_fixture!( name, path )
|
239
|
+
end
|
240
|
+
|
241
|
+
def load_usages_builtin( name )
|
242
|
+
load_usages_with_include_path( name, WorldDB.data_path )
|
243
|
+
end
|
244
|
+
|
105
245
|
def load_xxx_with_include_path( xxx, name, include_path )
|
106
246
|
path = "#{include_path}/#{name}.yml"
|
107
247
|
|
@@ -262,9 +402,9 @@ private
|
|
262
402
|
if area >= 1_000_000
|
263
403
|
value_tag_keys << 'area_1_000_000_n_up'
|
264
404
|
elsif area >= 100_000
|
265
|
-
value_tag_keys << '
|
405
|
+
value_tag_keys << 'area_100_000_to_1_000_000'
|
266
406
|
elsif area >= 1000
|
267
|
-
value_tag_keys << '
|
407
|
+
value_tag_keys << 'area_1_000_to_100_000'
|
268
408
|
else
|
269
409
|
value_tag_keys << 'area_1_000_n_less' # microstate
|
270
410
|
end
|
@@ -278,9 +418,9 @@ private
|
|
278
418
|
if pop >= 100_000_000
|
279
419
|
value_tag_keys << 'pop_100m_n_up'
|
280
420
|
elsif pop >= 10_000_000
|
281
|
-
value_tag_keys << '
|
421
|
+
value_tag_keys << 'pop_10m_to_100m'
|
282
422
|
elsif pop >= 1_000_000
|
283
|
-
value_tag_keys << '
|
423
|
+
value_tag_keys << 'pop_1m_to_10m'
|
284
424
|
else
|
285
425
|
value_tag_keys << 'pop_1m_n_less'
|
286
426
|
end
|
@@ -39,11 +39,25 @@ class HashReader
|
|
39
39
|
end
|
40
40
|
|
41
41
|
## quote implicit boolean types on,no,n,y
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
|
43
|
+
## nb: escape only if key e.g. no: or "free standing" value on its own line e.g.
|
44
|
+
## no: no
|
45
|
+
|
46
|
+
text = text.gsub( /^([ ]*)(ON|On|on|NO|No|no|N|n|Y|y)[ ]*:/ ) do |value|
|
47
|
+
puts "*** warn: hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html"
|
48
|
+
# nb: preserve leading spaces for structure - might be significant
|
49
|
+
"#{$1}'#{$2}':" # add quotes to turn it into a string (not bool e.g. true|false)
|
46
50
|
end
|
51
|
+
|
52
|
+
## nb: value must be freestanding (only allow optional eol comment)
|
53
|
+
## do not escape if part of string sequence e.g.
|
54
|
+
## key: nb,nn,no,se => nb,nn,'no',se -- avoid!!
|
55
|
+
|
56
|
+
text = text.gsub( /:[ ]+(ON|On|on|NO|No|no|N|n|Y|y)[ ]*($| #.*$)/ ) do |value|
|
57
|
+
puts "*** warn: hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html"
|
58
|
+
": '#{$1}'" # add quotes to turn it into a string (not bool e.g. true|false)
|
59
|
+
end
|
60
|
+
|
47
61
|
|
48
62
|
@hash = YAML.load( text )
|
49
63
|
end
|
@@ -58,7 +72,7 @@ class HashReader
|
|
58
72
|
key = key_wild.to_s.strip
|
59
73
|
value = value_wild.to_s.strip
|
60
74
|
|
61
|
-
puts ">>#{key}
|
75
|
+
puts "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
|
62
76
|
|
63
77
|
yield( key, value )
|
64
78
|
end
|
data/lib/worlddb/schema.rb
CHANGED
@@ -82,7 +82,8 @@ end
|
|
82
82
|
create_table :tags do |t|
|
83
83
|
t.string :key, :null => false
|
84
84
|
t.string :slug, :null => false
|
85
|
-
t.string :title # todo: make required?
|
85
|
+
t.string :title # todo: make required?
|
86
|
+
t.integer :grade, :null => false, :default => 1 # grade/tier e.g. 1/2/3 for now
|
86
87
|
## todo: add parent or similar for hierachy (for tag stacks/packs)
|
87
88
|
t.timestamps
|
88
89
|
end
|
@@ -99,6 +100,21 @@ add_index :taggings, :tag_id
|
|
99
100
|
add_index :taggings, [:taggable_id, :taggable_type]
|
100
101
|
|
101
102
|
|
103
|
+
create_table :langs do |t| # langs == languages (e.g. en/English, de/Deutsch, etc.)
|
104
|
+
t.string :key, :null => false
|
105
|
+
t.string :title, :null => false
|
106
|
+
t.timestamps
|
107
|
+
end
|
108
|
+
|
109
|
+
create_table :usages do |t| # join table for countries_langs
|
110
|
+
t.references :country, :null => false
|
111
|
+
t.references :lang, :null => false
|
112
|
+
t.boolean :official, :null => false, :default => true # is_official language in country
|
113
|
+
t.boolean :minor, :null => false, :default => false # spoken by minority
|
114
|
+
t.float :percent # usage in percent e.g. 90.0, 0.55, etc.
|
115
|
+
t.timestamps
|
116
|
+
end
|
117
|
+
|
102
118
|
create_table :props do |t|
|
103
119
|
t.string :key, :null => false
|
104
120
|
t.string :value, :null => false
|
data/lib/worlddb/version.rb
CHANGED
data/lib/worlddb.rb
CHANGED
@@ -28,6 +28,8 @@ require 'worlddb/models/region'
|
|
28
28
|
require 'worlddb/models/city'
|
29
29
|
require 'worlddb/models/tag'
|
30
30
|
require 'worlddb/models/tagging'
|
31
|
+
require 'worlddb/models/lang'
|
32
|
+
require 'worlddb/models/usage'
|
31
33
|
require 'worlddb/schema' # NB: requires worlddb/models (include WorldDB::Models)
|
32
34
|
require 'worlddb/utils'
|
33
35
|
require 'worlddb/readers/code_reader'
|
@@ -62,6 +64,9 @@ module WorldDB
|
|
62
64
|
end
|
63
65
|
|
64
66
|
def self.fixtures # all builtin fixtures; helper for covenience
|
67
|
+
tag_fixtures +
|
68
|
+
country_fixtures +
|
69
|
+
lang_fixtures +
|
65
70
|
africa_fixtures +
|
66
71
|
america_fixtures +
|
67
72
|
europe_fixtures +
|
@@ -69,16 +74,37 @@ module WorldDB
|
|
69
74
|
oceania_fixtures
|
70
75
|
end
|
71
76
|
|
77
|
+
|
78
|
+
def self.tag_fixtures
|
79
|
+
['tags.1', 'tags.3']
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.country_fixtures
|
83
|
+
['africa/countries',
|
84
|
+
'america/countries',
|
85
|
+
'asia/countries',
|
86
|
+
'europe/countries',
|
87
|
+
'oceania/countries']
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.lang_fixtures
|
91
|
+
['langs',
|
92
|
+
'africa/3_more/lang',
|
93
|
+
'america/3_more/lang',
|
94
|
+
'asia/3_more/lang',
|
95
|
+
'europe/3_more/lang',
|
96
|
+
'oceania/3_more/lang'
|
97
|
+
]
|
98
|
+
end
|
99
|
+
|
72
100
|
def self.africa_fixtures
|
73
|
-
['
|
74
|
-
|
75
|
-
|
76
|
-
'1_codes/iso3'].map { |path| "africa/#{path}" }
|
101
|
+
['1_codes/fifa',
|
102
|
+
'1_codes/internet',
|
103
|
+
'1_codes/iso3'].map { |path| "africa/#{path}" }
|
77
104
|
end
|
78
105
|
|
79
106
|
def self.america_fixtures
|
80
|
-
['
|
81
|
-
'1_codes/fifa',
|
107
|
+
['1_codes/fifa',
|
82
108
|
'1_codes/internet',
|
83
109
|
'1_codes/iso3',
|
84
110
|
'1_codes/motor',
|
@@ -94,16 +120,14 @@ module WorldDB
|
|
94
120
|
end
|
95
121
|
|
96
122
|
def self.asia_fixtures
|
97
|
-
['
|
98
|
-
'1_codes/fifa',
|
123
|
+
['1_codes/fifa',
|
99
124
|
'1_codes/internet',
|
100
125
|
'1_codes/iso3',
|
101
126
|
'jp/cities'].map { |path| "asia/#{path}" }
|
102
127
|
end
|
103
128
|
|
104
129
|
def self.europe_fixtures
|
105
|
-
['
|
106
|
-
'1_codes/fifa',
|
130
|
+
['1_codes/fifa',
|
107
131
|
'1_codes/internet',
|
108
132
|
'1_codes/iso3',
|
109
133
|
'1_codes/motor',
|
@@ -150,11 +174,10 @@ module WorldDB
|
|
150
174
|
end
|
151
175
|
|
152
176
|
def self.oceania_fixtures
|
153
|
-
['
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
'au/cities'].map { |path| "oceania/#{path}" }
|
177
|
+
['1_codes/fifa',
|
178
|
+
'1_codes/internet',
|
179
|
+
'1_codes/iso3',
|
180
|
+
'au/cities'].map { |path| "oceania/#{path}" }
|
158
181
|
end
|
159
182
|
|
160
183
|
## todo/fix: rename to load/load_all - why? why not?? or just add an alias?
|
@@ -198,6 +221,8 @@ module WorldDB
|
|
198
221
|
City.delete_all
|
199
222
|
Region.delete_all
|
200
223
|
Country.delete_all
|
224
|
+
Usage.delete_all
|
225
|
+
Lang.delete_all
|
201
226
|
Prop.delete_all
|
202
227
|
end
|
203
228
|
|
@@ -221,6 +246,8 @@ module WorldDB
|
|
221
246
|
puts " #{'%5d' % City.where(d: true).count} districts"
|
222
247
|
puts " #{'%5d' % Tag.count} tags"
|
223
248
|
puts " #{'%5d' % Tagging.count} taggings"
|
249
|
+
puts " #{'%5d' % Lang.count} langs"
|
250
|
+
puts " #{'%5d' % Usage.count} usages"
|
224
251
|
end
|
225
252
|
|
226
253
|
def props
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: worlddb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 63
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 8
|
9
|
+
- 0
|
10
|
+
version: 0.8.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gerald Bauer
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-12-
|
18
|
+
date: 2012-12-09 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: activerecord
|
@@ -81,6 +81,7 @@ files:
|
|
81
81
|
- data/africa/1_codes/iso3.yml
|
82
82
|
- data/africa/2_names/de.yml
|
83
83
|
- data/africa/3_more/en.wikipedia.yml
|
84
|
+
- data/africa/3_more/lang.yml
|
84
85
|
- data/africa/countries.txt
|
85
86
|
- data/america/1_codes/fifa.yml
|
86
87
|
- data/america/1_codes/internet.yml
|
@@ -89,6 +90,7 @@ files:
|
|
89
90
|
- data/america/2_names/de.yml
|
90
91
|
- data/america/2_names/es.yml
|
91
92
|
- data/america/3_more/en.wikipedia.yml
|
93
|
+
- data/america/3_more/lang.yml
|
92
94
|
- data/america/br/regions.txt
|
93
95
|
- data/america/ca/cities.txt
|
94
96
|
- data/america/ca/regions.txt
|
@@ -104,6 +106,7 @@ files:
|
|
104
106
|
- data/asia/1_codes/iso3.yml
|
105
107
|
- data/asia/2_names/de.yml
|
106
108
|
- data/asia/3_more/en.wikipedia.yml
|
109
|
+
- data/asia/3_more/lang.yml
|
107
110
|
- data/asia/countries.txt
|
108
111
|
- data/asia/jp/cities.txt
|
109
112
|
- data/europe/1_codes/fifa.yml
|
@@ -113,6 +116,7 @@ files:
|
|
113
116
|
- data/europe/2_names/de.yml
|
114
117
|
- data/europe/2_names/es.yml
|
115
118
|
- data/europe/3_more/en.wikipedia.yml
|
119
|
+
- data/europe/3_more/lang.yml
|
116
120
|
- data/europe/at/cities.txt
|
117
121
|
- data/europe/at/regions.txt
|
118
122
|
- data/europe/be/cities.txt
|
@@ -154,24 +158,29 @@ files:
|
|
154
158
|
- data/europe/tr/cities.txt
|
155
159
|
- data/europe/ua/cities.txt
|
156
160
|
- data/europe/wa/cities.txt
|
161
|
+
- data/langs.yml
|
157
162
|
- data/oceania/1_codes/fifa.yml
|
158
163
|
- data/oceania/1_codes/internet.yml
|
159
164
|
- data/oceania/1_codes/iso3.yml
|
160
165
|
- data/oceania/2_names/de.yml
|
161
166
|
- data/oceania/3_more/en.wikipedia.yml
|
167
|
+
- data/oceania/3_more/lang.yml
|
162
168
|
- data/oceania/au/cities.txt
|
163
169
|
- data/oceania/countries.txt
|
164
|
-
- data/tags.yml
|
170
|
+
- data/tags.1.yml
|
171
|
+
- data/tags.3.yml
|
165
172
|
- lib/worlddb.rb
|
166
173
|
- lib/worlddb/cli/opts.rb
|
167
174
|
- lib/worlddb/cli/runner.rb
|
168
175
|
- lib/worlddb/console.rb
|
169
176
|
- lib/worlddb/models/city.rb
|
170
177
|
- lib/worlddb/models/country.rb
|
178
|
+
- lib/worlddb/models/lang.rb
|
171
179
|
- lib/worlddb/models/prop.rb
|
172
180
|
- lib/worlddb/models/region.rb
|
173
181
|
- lib/worlddb/models/tag.rb
|
174
182
|
- lib/worlddb/models/tagging.rb
|
183
|
+
- lib/worlddb/models/usage.rb
|
175
184
|
- lib/worlddb/reader.rb
|
176
185
|
- lib/worlddb/readers/code_reader.rb
|
177
186
|
- lib/worlddb/readers/hash_reader.rb
|
data/data/tags.yml
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
###
|
3
|
-
# for 193 un members see www.un.org/en/members
|
4
|
-
|
5
|
-
|
6
|
-
## some tags
|
7
|
-
|
8
|
-
continents: europe, america, asia, africa, oceania
|
9
|
-
americas: north america, south america, central america, caribbean islands
|
10
|
-
orgs: un, eu, mercosur, nafta
|
11
|
-
football: uefa, afc, ofc, caf, csf, concacaf
|
12
|
-
|
13
|
-
europe: benelux,euro
|
14
|
-
|
15
|
-
|
16
|
-
# regions
|
17
|
-
brasil: s|South,se|Southeast,co|Centerwest,ne|Northeast,n|North
|