factbook 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +5 -0
- data/README.md +6 -3
- data/Rakefile +309 -17
- data/lib/factbook/page.rb +58 -201
- data/lib/factbook/sect.rb +179 -0
- data/lib/factbook/version.rb +1 -1
- data/lib/factbook.rb +2 -0
- data/test/data/countrytemplate_ee.html +2999 -0
- data/test/data/countrytemplate_ls.html +2728 -0
- data/test/data/countrytemplate_vt.html +1726 -0
- data/test/data/countrytemplate_xx.html +2898 -0
- data/test/test_json.rb +31 -29
- data/test/test_page.rb +18 -209
- data/test/test_page_old.rb +191 -3
- metadata +17 -12
data/Manifest.txt
CHANGED
@@ -4,11 +4,16 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
lib/factbook.rb
|
6
6
|
lib/factbook/page.rb
|
7
|
+
lib/factbook/sect.rb
|
7
8
|
lib/factbook/version.rb
|
8
9
|
test/data/countrytemplate_au.html
|
9
10
|
test/data/countrytemplate_be.html
|
10
11
|
test/data/countrytemplate_br.html
|
12
|
+
test/data/countrytemplate_ee.html
|
13
|
+
test/data/countrytemplate_ls.html
|
11
14
|
test/data/countrytemplate_mx.html
|
15
|
+
test/data/countrytemplate_vt.html
|
16
|
+
test/data/countrytemplate_xx.html
|
12
17
|
test/helper.rb
|
13
18
|
test/test_json.rb
|
14
19
|
test/test_page.rb
|
data/README.md
CHANGED
@@ -21,8 +21,9 @@ offers free country profiles in the public domain (that is, no copyright(s), no
|
|
21
21
|
|
22
22
|
### Get country profile page as a hash (that is, structured data e.g. nested key/values)
|
23
23
|
|
24
|
-
page = Factbook::Page.new( 'br' )
|
25
|
-
pp page.data
|
24
|
+
page = Factbook::Page.new( 'br' ) # br is the country code for Brazil
|
25
|
+
pp page.data # pretty print hash
|
26
|
+
|
26
27
|
|
27
28
|
### Save to disk as JSON
|
28
29
|
|
@@ -39,7 +40,9 @@ Just install the gem:
|
|
39
40
|
$ gem install factbook
|
40
41
|
|
41
42
|
|
42
|
-
## Ready-To-Use Public Domain Datasets
|
43
|
+
## Ready-To-Use Public Domain Datasets
|
44
|
+
|
45
|
+
Datasets generated by `factbook` include:
|
43
46
|
|
44
47
|
[openmundi/factbook.json](https://github.com/openmundi/factbook.json) - open (public domain)
|
45
48
|
factbook country profiles in JSON for all the world's countries (using internet domain names
|
data/Rakefile
CHANGED
@@ -32,17 +32,301 @@ Hoe.spec 'factbook' do
|
|
32
32
|
end
|
33
33
|
|
34
34
|
|
35
|
+
=begin
|
36
|
+
# errors to fix:
|
37
|
+
saving a copy to europe/li-liechtenstein.html for debugging
|
38
|
+
found section 0 @ 38
|
39
|
+
found section 1 @ 1882
|
40
|
+
found section 2 @ 13160
|
41
|
+
found section 3 @ 29355
|
42
|
+
found section 4 @ 46010
|
43
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
44
|
+
found section 6 @ 64725
|
45
|
+
|
46
|
+
aving a copy to europe/mc-monaco.html for debugging
|
47
|
+
found section 0 @ 38
|
48
|
+
found section 1 @ 1446
|
49
|
+
found section 2 @ 12736
|
50
|
+
found section 3 @ 31192
|
51
|
+
found section 4 @ 47762
|
52
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
53
|
+
|
54
|
+
saving a copy to europe/sm-san-marino.html for debugging
|
55
|
+
found section 0 @ 38
|
56
|
+
found section 1 @ 1379
|
57
|
+
found section 2 @ 12243
|
58
|
+
found section 3 @ 27349
|
59
|
+
found section 4 @ 46949
|
60
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
61
|
+
|
62
|
+
saving a copy to europe/va-vatican-city.html for debugging
|
63
|
+
found section 0 @ 38
|
64
|
+
found section 1 @ 2000
|
65
|
+
found section 2 @ 13093
|
66
|
+
found section 3 @ 19912
|
67
|
+
found section 4 @ 37264
|
68
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
69
|
+
found section 6 @ 44353
|
70
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Trans"
|
71
|
+
|
72
|
+
saving a copy to pacific/mh-marshall-islands.html for debugging
|
73
|
+
found section 0 @ 38
|
74
|
+
found section 1 @ 1414
|
75
|
+
found section 2 @ 13404
|
76
|
+
found section 3 @ 34854
|
77
|
+
found section 4 @ 52734
|
78
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
79
|
+
|
80
|
+
saving a copy to pacific/pw-palau.html for debugging
|
81
|
+
found section 0 @ 38
|
82
|
+
found section 1 @ 1338
|
83
|
+
found section 2 @ 12729
|
84
|
+
found section 3 @ 34145
|
85
|
+
found section 4 @ 51005
|
86
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
87
|
+
|
88
|
+
saving a copy to pacific/tv-tuvalu.html for debugging
|
89
|
+
found section 0 @ 38
|
90
|
+
found section 1 @ 1391
|
91
|
+
found section 2 @ 13580
|
92
|
+
found section 3 @ 33729
|
93
|
+
found section 4 @ 50390
|
94
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
95
|
+
|
96
|
+
saving a copy to africa/ss-south-sudan.html for debugging
|
97
|
+
found section 0 @ 38
|
98
|
+
found section 1 @ 2560
|
99
|
+
found section 2 @ 11342
|
100
|
+
found section 3 @ 26234
|
101
|
+
found section 4 @ 42271
|
102
|
+
*** error: section not found -- <div id="CollapsiblePanel1_Energy"
|
103
|
+
|
104
|
+
|
105
|
+
=end
|
106
|
+
|
107
|
+
|
35
108
|
|
36
109
|
desc 'generate json for factbook.json repo'
|
37
110
|
task :genjson do
|
38
111
|
require 'factbook'
|
39
112
|
|
40
113
|
countries = [
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
114
|
+
=begin
|
115
|
+
['xx', 'world' ], ## special code for the world
|
116
|
+
|
117
|
+
['ee', 'europe/eu-european-union'], ## special code for the european union
|
118
|
+
['al', 'europe/al-albania' ],
|
119
|
+
['an', 'europe/ad-andorra' ],
|
120
|
+
['am', 'europe/am-armenia' ],
|
121
|
+
['au', 'europe/at-austria' ],
|
122
|
+
['aj', 'europe/az-azerbaijan' ],
|
123
|
+
['bo', 'europe/by-belarus' ],
|
124
|
+
['be', 'europe/be-belgium' ],
|
125
|
+
['bk', 'europe/ba-bosnia-n-herzegovina' ],
|
126
|
+
['bu', 'europe/bg-bulgaria' ],
|
127
|
+
['hr', 'europe/hr-croatia' ],
|
128
|
+
['cy', 'europe/cy-cyprus' ],
|
129
|
+
['ez', 'europe/cz-czech-republic' ],
|
130
|
+
['da', 'europe/dk-denmark' ],
|
131
|
+
['en', 'europe/ee-estonia' ],
|
132
|
+
['fi', 'europe/fi-finland' ],
|
133
|
+
['fr', 'europe/fr-france' ],
|
134
|
+
['gg', 'europe/ge-georgia' ],
|
135
|
+
['gm', 'europe/de-germany' ],
|
136
|
+
['uk', 'europe/gb-great-britain' ],
|
137
|
+
['gr', 'europe/gr-greece' ],
|
138
|
+
['hu', 'europe/hu-hungary' ],
|
139
|
+
['ic', 'europe/is-iceland' ],
|
140
|
+
['ei', 'europe/ie-ireland' ],
|
141
|
+
['it', 'europe/it-italy' ],
|
142
|
+
['lg', 'europe/lv-latvia' ],
|
143
|
+
# ['ls', 'europe/li-liechtenstein' ],
|
144
|
+
['lh', 'europe/lt-lithuania' ],
|
145
|
+
['lu', 'europe/lu-luxembourg' ],
|
146
|
+
['mk', 'europe/mk-macedonia' ],
|
147
|
+
['mt', 'europe/mt-malta' ],
|
148
|
+
['md', 'europe/md-moldova' ],
|
149
|
+
# ['mn', 'europe/mc-monaco' ],
|
150
|
+
['mj', 'europe/me-montenegro' ],
|
151
|
+
['nl', 'europe/nl-netherlands' ],
|
152
|
+
['no', 'europe/no-norway' ],
|
153
|
+
['pl', 'europe/pl-poland' ],
|
154
|
+
['po', 'europe/pt-portugal' ],
|
155
|
+
['ro', 'europe/ro-romania' ],
|
156
|
+
['rs', 'europe/ru-russia' ],
|
157
|
+
# ['sm', 'europe/sm-san-marino' ],
|
158
|
+
['ri', 'europe/rs-serbia' ],
|
159
|
+
['lo', 'europe/sk-slovakia' ],
|
160
|
+
['si', 'europe/si-slovenia' ],
|
161
|
+
['sp', 'europe/es-spain' ],
|
162
|
+
['sw', 'europe/se-sweden' ],
|
163
|
+
['sz', 'europe/ch-switzerland' ],
|
164
|
+
['tu', 'europe/tr-turkey' ],
|
165
|
+
['up', 'europe/ua-ukraine' ],
|
166
|
+
# ['vt', 'europe/va-vatican-city' ],
|
167
|
+
|
168
|
+
['ca', 'north-america/ca-canada' ],
|
169
|
+
['us', 'north-america/us-united-states' ],
|
170
|
+
['mx', 'north-america/mx-mexico' ],
|
171
|
+
|
172
|
+
['ac', 'caribbean/ag-antigua-n-barbuda' ],
|
173
|
+
['bf', 'caribbean/bs-bahamas' ],
|
174
|
+
['bb', 'caribbean/bb-barbados' ],
|
175
|
+
['cu', 'caribbean/cu-cuba' ],
|
176
|
+
['do', 'caribbean/dm-dominica' ],
|
177
|
+
['dr', 'caribbean/do-dominican-republic' ],
|
178
|
+
['gj', 'caribbean/gd-grenada' ],
|
179
|
+
['ha', 'caribbean/ht-haiti' ],
|
180
|
+
['jm', 'caribbean/jm-jamaica' ],
|
181
|
+
['sc', 'caribbean/kn-saint-kitts-n-nevis' ],
|
182
|
+
['st', 'caribbean/lc-saint-lucia' ],
|
183
|
+
['vc', 'caribbean/vc-saint-vincent-n-the-grenadines' ],
|
184
|
+
['td', 'caribbean/tt-trinidad-n-tobago' ],
|
185
|
+
|
186
|
+
['bh', 'central-america/bz-belize' ],
|
187
|
+
['cs', 'central-america/cr-costa-rica' ],
|
188
|
+
['es', 'central-america/sv-el-salvador' ],
|
189
|
+
['gt', 'central-america/gt-guatemala' ],
|
190
|
+
['ho', 'central-america/hn-honduras' ],
|
191
|
+
['nu', 'central-america/ni-nicaragua' ],
|
192
|
+
['pm', 'central-america/pa-panama' ],
|
193
|
+
|
194
|
+
['ar', 'south-america/ar-argentina' ],
|
195
|
+
['bl', 'south-america/bo-bolivia' ],
|
196
|
+
['br', 'south-america/br-brazil' ],
|
197
|
+
['ci', 'south-america/cl-chile' ],
|
198
|
+
['co', 'south-america/co-colombia' ],
|
199
|
+
['ec', 'south-america/ec-ecuador' ],
|
200
|
+
['gy', 'south-america/gy-guyana' ],
|
201
|
+
['pa', 'south-america/py-paraguay' ],
|
202
|
+
['pe', 'south-america/pe-peru' ],
|
203
|
+
['ns', 'south-america/sr-suriname' ],
|
204
|
+
['uy', 'south-america/uy-uruguay' ],
|
205
|
+
['ve', 'south-america/ve-venezuela' ],
|
206
|
+
|
207
|
+
['ag', 'africa/dz-algeria' ],
|
208
|
+
['ao', 'africa/ao-angola' ],
|
209
|
+
['bn', 'africa/bj-benin' ],
|
210
|
+
['bc', 'africa/bw-botswana' ],
|
211
|
+
['uv', 'africa/bf-burkina-faso' ],
|
212
|
+
['by', 'africa/bi-burundi' ],
|
213
|
+
['cm', 'africa/cm-cameroon' ],
|
214
|
+
['cv', 'africa/cv-cape-verde' ],
|
215
|
+
['ct', 'africa/cf-central-african-republic' ],
|
216
|
+
['cd', 'africa/td-chad' ],
|
217
|
+
['cn', 'africa/km-comoros' ],
|
218
|
+
['cf', 'africa/cg-congo' ],
|
219
|
+
['cg', 'africa/cd-congo-dr' ],
|
220
|
+
['iv', 'africa/ci-cote-d-ivoire' ],
|
221
|
+
['dj', 'africa/dj-djibouti' ],
|
222
|
+
['eg', 'africa/eg-egypt' ],
|
223
|
+
['ek', 'africa/gq-equatorial-guinea' ],
|
224
|
+
['er', 'africa/er-eritrea' ],
|
225
|
+
['et', 'africa/et-ethiopia' ],
|
226
|
+
['gb', 'africa/ga-gabon' ],
|
227
|
+
['ga', 'africa/gm-gambia' ],
|
228
|
+
['gh', 'africa/gh-ghana' ],
|
229
|
+
['gv', 'africa/gn-guinea' ],
|
230
|
+
['pu', 'africa/gw-guinea-bissau' ],
|
231
|
+
['ke', 'africa/ke-kenya' ],
|
232
|
+
['lt', 'africa/ls-lesotho' ],
|
233
|
+
['li', 'africa/lr-liberia' ],
|
234
|
+
['ly', 'africa/ly-libya' ],
|
235
|
+
['ma', 'africa/mg-madagascar' ],
|
236
|
+
['mi', 'africa/mw-malawi' ],
|
237
|
+
['ml', 'africa/ml-mali' ],
|
238
|
+
['mr', 'africa/mr-mauritania' ],
|
239
|
+
['mp', 'africa/mu-mauritius' ],
|
240
|
+
['mo', 'africa/ma-morocco' ],
|
241
|
+
['mz', 'africa/mz-mozambique' ],
|
242
|
+
['wa', 'africa/na-namibia' ],
|
243
|
+
['ng', 'africa/ne-niger' ],
|
244
|
+
['ni', 'africa/ng-nigeria' ],
|
245
|
+
['rw', 'africa/rw-rwanda' ],
|
246
|
+
['tp', 'africa/st-st-sao-tome-n-principe' ],
|
247
|
+
['sg', 'africa/sn-senegal' ],
|
248
|
+
['se', 'africa/sc-seychelles' ],
|
249
|
+
['sl', 'africa/sl-sierra-leone' ],
|
250
|
+
['so', 'africa/so-somalia' ],
|
251
|
+
['sf', 'africa/za-south-africa' ],
|
252
|
+
# ['od', 'africa/ss-south-sudan' ],
|
253
|
+
['su', 'africa/sd-sudan' ],
|
254
|
+
['wz', 'africa/sz-swaziland' ],
|
255
|
+
['tz', 'africa/tz-tanzania' ],
|
256
|
+
['to', 'africa/tg-togo' ],
|
257
|
+
['ts', 'africa/tn-tunisia' ],
|
258
|
+
['ug', 'africa/ug-uganda' ],
|
259
|
+
['za', 'africa/zm-zambia' ],
|
260
|
+
['zi', 'africa/zw-zimbabwe' ],
|
261
|
+
|
262
|
+
['ba', 'middle-east/bh-bahrain' ],
|
263
|
+
['ir', 'middle-east/ir-iran' ],
|
264
|
+
['iz', 'middle-east/iq-iraq' ],
|
265
|
+
['is', 'middle-east/il-israel' ],
|
266
|
+
['jo', 'middle-east/jo-jordan' ],
|
267
|
+
['ku', 'middle-east/kw-kuwait' ],
|
268
|
+
['le', 'middle-east/lb-lebanon' ],
|
269
|
+
['mu', 'middle-east/om-oman' ],
|
270
|
+
### ['??', 'middle-east/ps-palestine' ], -- incl. gaza strip n west bank
|
271
|
+
['qa', 'middle-east/qa-qatar' ],
|
272
|
+
['sa', 'middle-east/sa-saudi-arabia' ],
|
273
|
+
['sy', 'middle-east/sy-syria' ],
|
274
|
+
['ae', 'middle-east/ae-united-arab-emirates' ],
|
275
|
+
['ym', 'middle-east/ye-yemen' ],
|
276
|
+
|
277
|
+
['af', 'asia/af-afghanistan' ],
|
278
|
+
['bg', 'asia/bd-bangladesh' ],
|
279
|
+
['bt', 'asia/bt-bhutan' ],
|
280
|
+
['bx', 'asia/bn-brunei' ],
|
281
|
+
['cb', 'asia/kh-cambodia' ],
|
282
|
+
['ch', 'asia/cn-china' ],
|
283
|
+
['in', 'asia/in-india' ],
|
284
|
+
['id', 'asia/id-indonesia' ],
|
285
|
+
['ja', 'asia/jp-japan' ],
|
286
|
+
['kz', 'asia/kz-kazakhstan' ],
|
287
|
+
['kg', 'asia/kg-kyrgyzstan' ],
|
288
|
+
['la', 'asia/la-laos' ],
|
289
|
+
['my', 'asia/my-malaysia' ],
|
290
|
+
['mv', 'asia/mv-maldives' ],
|
291
|
+
['mg', 'asia/mn-mongolia' ],
|
292
|
+
['bm', 'asia/mm-myanmar' ], ## still using Burma
|
293
|
+
['np', 'asia/np-nepal' ],
|
294
|
+
['kn', 'asia/kp-north-korea' ],
|
295
|
+
['pk', 'asia/pk-pakistan' ],
|
296
|
+
['rp', 'asia/ph-philippines' ],
|
297
|
+
['sn', 'asia/sg-singapore' ],
|
298
|
+
['ks', 'asia/kr-south-korea' ],
|
299
|
+
['ce', 'asia/lk-sri-lanka' ],
|
300
|
+
['tw', 'asia/tw-taiwan' ],
|
301
|
+
['ti', 'asia/tj-tajikistan' ],
|
302
|
+
['th', 'asia/th-thailand' ],
|
303
|
+
['tt', 'asia/tl-timor-leste' ],
|
304
|
+
['tx', 'asia/tm-turkmenistan' ],
|
305
|
+
['uz', 'asia/uz-uzbekistan' ],
|
306
|
+
['vm', 'asia/vn-vietnam' ],
|
307
|
+
|
308
|
+
['as', 'pacific/au-australia' ],
|
309
|
+
['fj', 'pacific/fj-fiji' ],
|
310
|
+
['kr', 'pacific/ki-kiribati' ],
|
311
|
+
# ['rm', 'pacific/mh-marshall-islands' ],
|
312
|
+
['fm', 'pacific/fm-micronesia' ],
|
313
|
+
['nr', 'pacific/nr-nauru' ],
|
314
|
+
['nz', 'pacific/nz-new-zealand' ],
|
315
|
+
# ['ps', 'pacific/pw-palau' ],
|
316
|
+
['pp', 'pacific/pg-papua-new-guinea' ],
|
317
|
+
['ws', 'pacific/ws-samoa' ],
|
318
|
+
['bp', 'pacific/sb-solomon-islands' ],
|
319
|
+
['tn', 'pacific/to-tonga' ],
|
320
|
+
# ['tv', 'pacific/tv-tuvalu' ],
|
321
|
+
['nh', 'pacific/vu-vanuatu' ],
|
322
|
+
|
323
|
+
=end
|
324
|
+
|
325
|
+
|
326
|
+
|
327
|
+
=begin
|
328
|
+
['', 'africa/' ],
|
329
|
+
=end
|
46
330
|
]
|
47
331
|
|
48
332
|
countries.each do |country|
|
@@ -51,29 +335,37 @@ task :genjson do
|
|
51
335
|
end
|
52
336
|
|
53
337
|
|
54
|
-
def gen_json_for(
|
55
|
-
|
56
|
-
|
57
|
-
|
338
|
+
def gen_json_for( country )
|
339
|
+
|
340
|
+
country_code = country[0]
|
341
|
+
country_path = country[1]
|
342
|
+
|
343
|
+
path_html = "tmp/html/#{country_path}.html"
|
344
|
+
path_json = "tmp/json/#{country_path}.json"
|
58
345
|
|
59
|
-
|
346
|
+
## make sure path exist
|
347
|
+
FileUtils.mkdir_p( File.dirname( path_html ) )
|
348
|
+
FileUtils.mkdir_p( File.dirname( path_json ) )
|
349
|
+
|
350
|
+
|
351
|
+
page = Factbook::Page.new( country_code )
|
60
352
|
|
61
353
|
## print first 600 chars
|
62
354
|
pp page.html[0..600]
|
63
355
|
|
64
356
|
## save for debuging
|
65
|
-
|
66
|
-
puts "saving a copy to #{
|
67
|
-
File.open(
|
357
|
+
|
358
|
+
puts "saving a copy to #{country_path}.html for debugging"
|
359
|
+
File.open( path_html, 'w') do |f|
|
68
360
|
f.write( page.html )
|
69
361
|
end
|
70
362
|
|
71
363
|
h = page.data
|
72
|
-
pp h
|
73
|
-
|
364
|
+
## pp h
|
365
|
+
|
74
366
|
### save to json
|
75
|
-
puts "saving a copy to #{
|
76
|
-
File.open(
|
367
|
+
puts "saving a copy to #{country_path}.json for debugging"
|
368
|
+
File.open( path_json, 'w') do |f|
|
77
369
|
f.write( JSON.pretty_generate( h ) )
|
78
370
|
end
|
79
371
|
end
|