factbook 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +5 -0
- data/README.md +6 -3
- data/Rakefile +309 -17
- data/lib/factbook/page.rb +58 -201
- data/lib/factbook/sect.rb +179 -0
- data/lib/factbook/version.rb +1 -1
- data/lib/factbook.rb +2 -0
- data/test/data/countrytemplate_ee.html +2999 -0
- data/test/data/countrytemplate_ls.html +2728 -0
- data/test/data/countrytemplate_vt.html +1726 -0
- data/test/data/countrytemplate_xx.html +2898 -0
- data/test/test_json.rb +31 -29
- data/test/test_page.rb +18 -209
- data/test/test_page_old.rb +191 -3
- metadata +17 -12
data/Manifest.txt
CHANGED
@@ -4,11 +4,16 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
lib/factbook.rb
|
6
6
|
lib/factbook/page.rb
|
7
|
+
lib/factbook/sect.rb
|
7
8
|
lib/factbook/version.rb
|
8
9
|
test/data/countrytemplate_au.html
|
9
10
|
test/data/countrytemplate_be.html
|
10
11
|
test/data/countrytemplate_br.html
|
12
|
+
test/data/countrytemplate_ee.html
|
13
|
+
test/data/countrytemplate_ls.html
|
11
14
|
test/data/countrytemplate_mx.html
|
15
|
+
test/data/countrytemplate_vt.html
|
16
|
+
test/data/countrytemplate_xx.html
|
12
17
|
test/helper.rb
|
13
18
|
test/test_json.rb
|
14
19
|
test/test_page.rb
|
data/README.md
CHANGED
@@ -21,8 +21,9 @@ offers free country profiles in the public domain (that is, no copyright(s), no
|
|
21
21
|
|
22
22
|
### Get country profile page as a hash (that is, structured data e.g. nested key/values)
|
23
23
|
|
24
|
-
page = Factbook::Page.new( 'br' )
|
25
|
-
pp page.data
|
24
|
+
page = Factbook::Page.new( 'br' ) # br is the country code for Brazil
|
25
|
+
pp page.data # pretty print hash
|
26
|
+
|
26
27
|
|
27
28
|
### Save to disk as JSON
|
28
29
|
|
@@ -39,7 +40,9 @@ Just install the gem:
|
|
39
40
|
$ gem install factbook
|
40
41
|
|
41
42
|
|
42
|
-
## Ready-To-Use Public Domain Datasets
|
43
|
+
## Ready-To-Use Public Domain Datasets
|
44
|
+
|
45
|
+
Datasets generated by `factbook` include:
|
43
46
|
|
44
47
|
[openmundi/factbook.json](https://github.com/openmundi/factbook.json) - open (public domain)
|
45
48
|
factbook country profiles in JSON for all the world's countries (using internet domain names
|
data/Rakefile
CHANGED
@@ -32,17 +32,301 @@ Hoe.spec 'factbook' do
|
|
32
32
|
end
|
33
33
|
|
34
34
|
|
35
|
+
=begin
|
36
|
+
# errors to fix:
|
37
|
+
saving a copy to europe/li-liechtenstein.html for debugging
|
38
|
+
found section 0 @ 38
|
39
|
+
found section 1 @ 1882
|
40
|
+
found section 2 @ 13160
|
41
|
+
found section 3 @ 29355
|
42
|
+
found section 4 @ 46010
|
43
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
44
|
+
found section 6 @ 64725
|
45
|
+
|
46
|
+
aving a copy to europe/mc-monaco.html for debugging
|
47
|
+
found section 0 @ 38
|
48
|
+
found section 1 @ 1446
|
49
|
+
found section 2 @ 12736
|
50
|
+
found section 3 @ 31192
|
51
|
+
found section 4 @ 47762
|
52
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
53
|
+
|
54
|
+
saving a copy to europe/sm-san-marino.html for debugging
|
55
|
+
found section 0 @ 38
|
56
|
+
found section 1 @ 1379
|
57
|
+
found section 2 @ 12243
|
58
|
+
found section 3 @ 27349
|
59
|
+
found section 4 @ 46949
|
60
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
61
|
+
|
62
|
+
saving a copy to europe/va-vatican-city.html for debugging
|
63
|
+
found section 0 @ 38
|
64
|
+
found section 1 @ 2000
|
65
|
+
found section 2 @ 13093
|
66
|
+
found section 3 @ 19912
|
67
|
+
found section 4 @ 37264
|
68
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
69
|
+
found section 6 @ 44353
|
70
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Trans"
|
71
|
+
|
72
|
+
saving a copy to pacific/mh-marshall-islands.html for debugging
|
73
|
+
found section 0 @ 38
|
74
|
+
found section 1 @ 1414
|
75
|
+
found section 2 @ 13404
|
76
|
+
found section 3 @ 34854
|
77
|
+
found section 4 @ 52734
|
78
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
79
|
+
|
80
|
+
saving a copy to pacific/pw-palau.html for debugging
|
81
|
+
found section 0 @ 38
|
82
|
+
found section 1 @ 1338
|
83
|
+
found section 2 @ 12729
|
84
|
+
found section 3 @ 34145
|
85
|
+
found section 4 @ 51005
|
86
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
87
|
+
|
88
|
+
saving a copy to pacific/tv-tuvalu.html for debugging
|
89
|
+
found section 0 @ 38
|
90
|
+
found section 1 @ 1391
|
91
|
+
found section 2 @ 13580
|
92
|
+
found section 3 @ 33729
|
93
|
+
found section 4 @ 50390
|
94
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
95
|
+
|
96
|
+
saving a copy to africa/ss-south-sudan.html for debugging
|
97
|
+
found section 0 @ 38
|
98
|
+
found section 1 @ 2560
|
99
|
+
found section 2 @ 11342
|
100
|
+
found section 3 @ 26234
|
101
|
+
found section 4 @ 42271
|
102
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
103
|
+
|
104
|
+
|
105
|
+
=end
|
106
|
+
|
107
|
+
|
35
108
|
|
36
109
|
desc 'generate json for factbook.json repo'
|
37
110
|
task :genjson do
|
38
111
|
require 'factbook'
|
39
112
|
|
40
113
|
countries = [
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
114
|
+
=begin
|
115
|
+
['xx', 'world' ], ## special code for the world
|
116
|
+
|
117
|
+
['ee', 'europe/eu-european-union'], ## special code for the european union
|
118
|
+
['al', 'europe/al-albania' ],
|
119
|
+
['an', 'europe/ad-andorra' ],
|
120
|
+
['am', 'europe/am-armenia' ],
|
121
|
+
['au', 'europe/at-austria' ],
|
122
|
+
['aj', 'europe/az-azerbaijan' ],
|
123
|
+
['bo', 'europe/by-belarus' ],
|
124
|
+
['be', 'europe/be-belgium' ],
|
125
|
+
['bk', 'europe/ba-bosnia-n-herzegovina' ],
|
126
|
+
['bu', 'europe/bg-bulgaria' ],
|
127
|
+
['hr', 'europe/hr-croatia' ],
|
128
|
+
['cy', 'europe/cy-cyprus' ],
|
129
|
+
['ez', 'europe/cz-czech-republic' ],
|
130
|
+
['da', 'europe/dk-denmark' ],
|
131
|
+
['en', 'europe/ee-estonia' ],
|
132
|
+
['fi', 'europe/fi-finland' ],
|
133
|
+
['fr', 'europe/fr-france' ],
|
134
|
+
['gg', 'europe/ge-georgia' ],
|
135
|
+
['gm', 'europe/de-germany' ],
|
136
|
+
['uk', 'europe/gb-great-britain' ],
|
137
|
+
['gr', 'europe/gr-greece' ],
|
138
|
+
['hu', 'europe/hu-hungary' ],
|
139
|
+
['ic', 'europe/is-iceland' ],
|
140
|
+
['ei', 'europe/ie-ireland' ],
|
141
|
+
['it', 'europe/it-italy' ],
|
142
|
+
['lg', 'europe/lv-latvia' ],
|
143
|
+
# ['ls', 'europe/li-liechtenstein' ],
|
144
|
+
['lh', 'europe/lt-lithuania' ],
|
145
|
+
['lu', 'europe/lu-luxembourg' ],
|
146
|
+
['mk', 'europe/mk-macedonia' ],
|
147
|
+
['mt', 'europe/mt-malta' ],
|
148
|
+
['md', 'europe/md-moldova' ],
|
149
|
+
# ['mn', 'europe/mc-monaco' ],
|
150
|
+
['mj', 'europe/me-montenegro' ],
|
151
|
+
['nl', 'europe/nl-netherlands' ],
|
152
|
+
['no', 'europe/no-norway' ],
|
153
|
+
['pl', 'europe/pl-poland' ],
|
154
|
+
['po', 'europe/pt-portugal' ],
|
155
|
+
['ro', 'europe/ro-romania' ],
|
156
|
+
['rs', 'europe/ru-russia' ],
|
157
|
+
# ['sm', 'europe/sm-san-marino' ],
|
158
|
+
['ri', 'europe/rs-serbia' ],
|
159
|
+
['lo', 'europe/sk-slovakia' ],
|
160
|
+
['si', 'europe/si-slovenia' ],
|
161
|
+
['sp', 'europe/es-spain' ],
|
162
|
+
['sw', 'europe/se-sweden' ],
|
163
|
+
['sz', 'europe/ch-switzerland' ],
|
164
|
+
['tu', 'europe/tr-turkey' ],
|
165
|
+
['up', 'europe/ua-ukraine' ],
|
166
|
+
# ['vt', 'europe/va-vatican-city' ],
|
167
|
+
|
168
|
+
['ca', 'north-america/ca-canada' ],
|
169
|
+
['us', 'north-america/us-united-states' ],
|
170
|
+
['mx', 'north-america/mx-mexico' ],
|
171
|
+
|
172
|
+
['ac', 'caribbean/ag-antigua-n-barbuda' ],
|
173
|
+
['bf', 'caribbean/bs-bahamas' ],
|
174
|
+
['bb', 'caribbean/bb-barbados' ],
|
175
|
+
['cu', 'caribbean/cu-cuba' ],
|
176
|
+
['do', 'caribbean/dm-dominica' ],
|
177
|
+
['dr', 'caribbean/do-dominican-republic' ],
|
178
|
+
['gj', 'caribbean/gd-grenada' ],
|
179
|
+
['ha', 'caribbean/ht-haiti' ],
|
180
|
+
['jm', 'caribbean/jm-jamaica' ],
|
181
|
+
['sc', 'caribbean/kn-saint-kitts-n-nevis' ],
|
182
|
+
['st', 'caribbean/lc-saint-lucia' ],
|
183
|
+
['vc', 'caribbean/vc-saint-vincent-n-the-grenadines' ],
|
184
|
+
['td', 'caribbean/tt-trinidad-n-tobago' ],
|
185
|
+
|
186
|
+
['bh', 'central-america/bz-belize' ],
|
187
|
+
['cs', 'central-america/cr-costa-rica' ],
|
188
|
+
['es', 'central-america/sv-el-salvador' ],
|
189
|
+
['gt', 'central-america/gt-guatemala' ],
|
190
|
+
['ho', 'central-america/hn-honduras' ],
|
191
|
+
['nu', 'central-america/ni-nicaragua' ],
|
192
|
+
['pm', 'central-america/pa-panama' ],
|
193
|
+
|
194
|
+
['ar', 'south-america/ar-argentina' ],
|
195
|
+
['bl', 'south-america/bo-bolivia' ],
|
196
|
+
['br', 'south-america/br-brazil' ],
|
197
|
+
['ci', 'south-america/cl-chile' ],
|
198
|
+
['co', 'south-america/co-colombia' ],
|
199
|
+
['ec', 'south-america/ec-ecuador' ],
|
200
|
+
['gy', 'south-america/gy-guyana' ],
|
201
|
+
['pa', 'south-america/py-paraguay' ],
|
202
|
+
['pe', 'south-america/pe-peru' ],
|
203
|
+
['ns', 'south-america/sr-suriname' ],
|
204
|
+
['uy', 'south-america/uy-uruguay' ],
|
205
|
+
['ve', 'south-america/ve-venezuela' ],
|
206
|
+
|
207
|
+
['ag', 'africa/dz-algeria' ],
|
208
|
+
['ao', 'africa/ao-angola' ],
|
209
|
+
['bn', 'africa/bj-benin' ],
|
210
|
+
['bc', 'africa/bw-botswana' ],
|
211
|
+
['uv', 'africa/bf-burkina-faso' ],
|
212
|
+
['by', 'africa/bi-burundi' ],
|
213
|
+
['cm', 'africa/cm-cameroon' ],
|
214
|
+
['cv', 'africa/cv-cape-verde' ],
|
215
|
+
['ct', 'africa/cf-central-african-republic' ],
|
216
|
+
['cd', 'africa/td-chad' ],
|
217
|
+
['cn', 'africa/km-comoros' ],
|
218
|
+
['cf', 'africa/cg-congo' ],
|
219
|
+
['cg', 'africa/cd-congo-dr' ],
|
220
|
+
['iv', 'africa/ci-cote-d-ivoire' ],
|
221
|
+
['dj', 'africa/dj-djibouti' ],
|
222
|
+
['eg', 'africa/eg-egypt' ],
|
223
|
+
['ek', 'africa/gq-equatorial-guinea' ],
|
224
|
+
['er', 'africa/er-eritrea' ],
|
225
|
+
['et', 'africa/et-ethiopia' ],
|
226
|
+
['gb', 'africa/ga-gabon' ],
|
227
|
+
['ga', 'africa/gm-gambia' ],
|
228
|
+
['gh', 'africa/gh-ghana' ],
|
229
|
+
['gv', 'africa/gn-guinea' ],
|
230
|
+
['pu', 'africa/gw-guinea-bissau' ],
|
231
|
+
['ke', 'africa/ke-kenya' ],
|
232
|
+
['lt', 'africa/ls-lesotho' ],
|
233
|
+
['li', 'africa/lr-liberia' ],
|
234
|
+
['ly', 'africa/ly-libya' ],
|
235
|
+
['ma', 'africa/mg-madagascar' ],
|
236
|
+
['mi', 'africa/mw-malawi' ],
|
237
|
+
['ml', 'africa/ml-mali' ],
|
238
|
+
['mr', 'africa/mr-mauritania' ],
|
239
|
+
['mp', 'africa/mu-mauritius' ],
|
240
|
+
['mo', 'africa/ma-morocco' ],
|
241
|
+
['mz', 'africa/mz-mozambique' ],
|
242
|
+
['wa', 'africa/na-namibia' ],
|
243
|
+
['ng', 'africa/ne-niger' ],
|
244
|
+
['ni', 'africa/ng-nigeria' ],
|
245
|
+
['rw', 'africa/rw-rwanda' ],
|
246
|
+
['tp', 'africa/st-st-sao-tome-n-principe' ],
|
247
|
+
['sg', 'africa/sn-senegal' ],
|
248
|
+
['se', 'africa/sc-seychelles' ],
|
249
|
+
['sl', 'africa/sl-sierra-leone' ],
|
250
|
+
['so', 'africa/so-somalia' ],
|
251
|
+
['sf', 'africa/za-south-africa' ],
|
252
|
+
# ['od', 'africa/ss-south-sudan' ],
|
253
|
+
['su', 'africa/sd-sudan' ],
|
254
|
+
['wz', 'africa/sz-swaziland' ],
|
255
|
+
['tz', 'africa/tz-tanzania' ],
|
256
|
+
['to', 'africa/tg-togo' ],
|
257
|
+
['ts', 'africa/tn-tunisia' ],
|
258
|
+
['ug', 'africa/ug-uganda' ],
|
259
|
+
['za', 'africa/zm-zambia' ],
|
260
|
+
['zi', 'africa/zw-zimbabwe' ],
|
261
|
+
|
262
|
+
['ba', 'middle-east/bh-bahrain' ],
|
263
|
+
['ir', 'middle-east/ir-iran' ],
|
264
|
+
['iz', 'middle-east/iq-iraq' ],
|
265
|
+
['is', 'middle-east/il-israel' ],
|
266
|
+
['jo', 'middle-east/jo-jordan' ],
|
267
|
+
['ku', 'middle-east/kw-kuwait' ],
|
268
|
+
['le', 'middle-east/lb-lebanon' ],
|
269
|
+
['mu', 'middle-east/om-oman' ],
|
270
|
+
### ['??', 'middle-east/ps-palestine' ], -- incl. gaza strip n west bank
|
271
|
+
['qa', 'middle-east/qa-qatar' ],
|
272
|
+
['sa', 'middle-east/sa-saudi-arabia' ],
|
273
|
+
['sy', 'middle-east/sy-syria' ],
|
274
|
+
['ae', 'middle-east/ae-united-arab-emirates' ],
|
275
|
+
['ym', 'middle-east/ye-yemen' ],
|
276
|
+
|
277
|
+
['af', 'asia/af-afghanistan' ],
|
278
|
+
['bg', 'asia/bd-bangladesh' ],
|
279
|
+
['bt', 'asia/bt-bhutan' ],
|
280
|
+
['bx', 'asia/bn-brunei' ],
|
281
|
+
['cb', 'asia/kh-cambodia' ],
|
282
|
+
['ch', 'asia/cn-china' ],
|
283
|
+
['in', 'asia/in-india' ],
|
284
|
+
['id', 'asia/id-indonesia' ],
|
285
|
+
['ja', 'asia/jp-japan' ],
|
286
|
+
['kz', 'asia/kz-kazakhstan' ],
|
287
|
+
['kg', 'asia/kg-kyrgyzstan' ],
|
288
|
+
['la', 'asia/la-laos' ],
|
289
|
+
['my', 'asia/my-malaysia' ],
|
290
|
+
['mv', 'asia/mv-maldives' ],
|
291
|
+
['mg', 'asia/mn-mongolia' ],
|
292
|
+
['bm', 'asia/mm-myanmar' ], ## still using Burma
|
293
|
+
['np', 'asia/np-nepal' ],
|
294
|
+
['kn', 'asia/kp-north-korea' ],
|
295
|
+
['pk', 'asia/pk-pakistan' ],
|
296
|
+
['rp', 'asia/ph-philippines' ],
|
297
|
+
['sn', 'asia/sg-singapore' ],
|
298
|
+
['ks', 'asia/kr-south-korea' ],
|
299
|
+
['ce', 'asia/lk-sri-lanka' ],
|
300
|
+
['tw', 'asia/tw-taiwan' ],
|
301
|
+
['ti', 'asia/tj-tajikistan' ],
|
302
|
+
['th', 'asia/th-thailand' ],
|
303
|
+
['tt', 'asia/tl-timor-leste' ],
|
304
|
+
['tx', 'asia/tm-turkmenistan' ],
|
305
|
+
['uz', 'asia/uz-uzbekistan' ],
|
306
|
+
['vm', 'asia/vn-vietnam' ],
|
307
|
+
|
308
|
+
['as', 'pacific/au-australia' ],
|
309
|
+
['fj', 'pacific/fj-fiji' ],
|
310
|
+
['kr', 'pacific/ki-kiribati' ],
|
311
|
+
# ['rm', 'pacific/mh-marshall-islands' ],
|
312
|
+
['fm', 'pacific/fm-micronesia' ],
|
313
|
+
['nr', 'pacific/nr-nauru' ],
|
314
|
+
['nz', 'pacific/nz-new-zealand' ],
|
315
|
+
# ['ps', 'pacific/pw-palau' ],
|
316
|
+
['pp', 'pacific/pg-papua-new-guinea' ],
|
317
|
+
['ws', 'pacific/ws-samoa' ],
|
318
|
+
['bp', 'pacific/sb-solomon-islands' ],
|
319
|
+
['tn', 'pacific/to-tonga' ],
|
320
|
+
# ['tv', 'pacific/tv-tuvalu' ],
|
321
|
+
['nh', 'pacific/vu-vanuatu' ],
|
322
|
+
|
323
|
+
=end
|
324
|
+
|
325
|
+
|
326
|
+
|
327
|
+
=begin
|
328
|
+
['', 'africa/' ],
|
329
|
+
=end
|
46
330
|
]
|
47
331
|
|
48
332
|
countries.each do |country|
|
@@ -51,29 +335,37 @@ task :genjson do
|
|
51
335
|
end
|
52
336
|
|
53
337
|
|
54
|
-
def gen_json_for(
|
55
|
-
|
56
|
-
|
57
|
-
|
338
|
+
def gen_json_for( country )
|
339
|
+
|
340
|
+
country_code = country[0]
|
341
|
+
country_path = country[1]
|
342
|
+
|
343
|
+
path_html = "tmp/html/#{country_path}.html"
|
344
|
+
path_json = "tmp/json/#{country_path}.json"
|
58
345
|
|
59
|
-
|
346
|
+
## make sure path exist
|
347
|
+
FileUtils.mkdir_p( File.dirname( path_html ) )
|
348
|
+
FileUtils.mkdir_p( File.dirname( path_json ) )
|
349
|
+
|
350
|
+
|
351
|
+
page = Factbook::Page.new( country_code )
|
60
352
|
|
61
353
|
## print first 600 chars
|
62
354
|
pp page.html[0..600]
|
63
355
|
|
64
356
|
## save for debuging
|
65
|
-
|
66
|
-
puts "saving a copy to #{
|
67
|
-
File.open(
|
357
|
+
|
358
|
+
puts "saving a copy to #{country_path}.html for debugging"
|
359
|
+
File.open( path_html, 'w') do |f|
|
68
360
|
f.write( page.html )
|
69
361
|
end
|
70
362
|
|
71
363
|
h = page.data
|
72
|
-
pp h
|
73
|
-
|
364
|
+
## pp h
|
365
|
+
|
74
366
|
### save to json
|
75
|
-
puts "saving a copy to #{
|
76
|
-
File.open(
|
367
|
+
puts "saving a copy to #{country_path}.json for debugging"
|
368
|
+
File.open( path_json, 'w') do |f|
|
77
369
|
f.write( JSON.pretty_generate( h ) )
|
78
370
|
end
|
79
371
|
end
|