factbook 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -4,11 +4,16 @@ README.md
4
4
  Rakefile
5
5
  lib/factbook.rb
6
6
  lib/factbook/page.rb
7
+ lib/factbook/sect.rb
7
8
  lib/factbook/version.rb
8
9
  test/data/countrytemplate_au.html
9
10
  test/data/countrytemplate_be.html
10
11
  test/data/countrytemplate_br.html
12
+ test/data/countrytemplate_ee.html
13
+ test/data/countrytemplate_ls.html
11
14
  test/data/countrytemplate_mx.html
15
+ test/data/countrytemplate_vt.html
16
+ test/data/countrytemplate_xx.html
12
17
  test/helper.rb
13
18
  test/test_json.rb
14
19
  test/test_page.rb
data/README.md CHANGED
@@ -21,8 +21,9 @@ offers free country profiles in the public domain (that is, no copyright(s), no
21
21
 
22
22
  ### Get country profile page as a hash (that is, structured data e.g. nested key/values)
23
23
 
24
- page = Factbook::Page.new( 'br' )
25
- pp page.data # pretty print hash
24
+ page = Factbook::Page.new( 'br' ) # br is the country code for Brazil
25
+ pp page.data # pretty print hash
26
+
26
27
 
27
28
  ### Save to disk as JSON
28
29
 
@@ -39,7 +40,9 @@ Just install the gem:
39
40
  $ gem install factbook
40
41
 
41
42
 
42
- ## Ready-To-Use Public Domain Datasets (Generated by `factbook`)
43
+ ## Ready-To-Use Public Domain Datasets
44
+
45
+ Datasets generated by `factbook` include:
43
46
 
44
47
  [openmundi/factbook.json](https://github.com/openmundi/factbook.json) - open (public domain)
45
48
  factbook country profiles in JSON for all the world's countries (using internet domain names
data/Rakefile CHANGED
@@ -32,17 +32,301 @@ Hoe.spec 'factbook' do
32
32
  end
33
33
 
34
34
 
35
+ =begin
36
+ # errors to fix:
37
+ saving a copy to europe/li-liechtenstein.html for debugging
38
+ found section 0 @ 38
39
+ found section 1 @ 1882
40
+ found section 2 @ 13160
41
+ found section 3 @ 29355
42
+ found section 4 @ 46010
43
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
44
+ found section 6 @ 64725
45
+
46
+ aving a copy to europe/mc-monaco.html for debugging
47
+ found section 0 @ 38
48
+ found section 1 @ 1446
49
+ found section 2 @ 12736
50
+ found section 3 @ 31192
51
+ found section 4 @ 47762
52
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
53
+
54
+ saving a copy to europe/sm-san-marino.html for debugging
55
+ found section 0 @ 38
56
+ found section 1 @ 1379
57
+ found section 2 @ 12243
58
+ found section 3 @ 27349
59
+ found section 4 @ 46949
60
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
61
+
62
+ saving a copy to europe/va-vatican-city.html for debugging
63
+ found section 0 @ 38
64
+ found section 1 @ 2000
65
+ found section 2 @ 13093
66
+ found section 3 @ 19912
67
+ found section 4 @ 37264
68
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
69
+ found section 6 @ 44353
70
+ *** error: section not found -- <div id="CollapsiblePanel1_Trans"
71
+
72
+ saving a copy to pacific/mh-marshall-islands.html for debugging
73
+ found section 0 @ 38
74
+ found section 1 @ 1414
75
+ found section 2 @ 13404
76
+ found section 3 @ 34854
77
+ found section 4 @ 52734
78
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
79
+
80
+ saving a copy to pacific/pw-palau.html for debugging
81
+ found section 0 @ 38
82
+ found section 1 @ 1338
83
+ found section 2 @ 12729
84
+ found section 3 @ 34145
85
+ found section 4 @ 51005
86
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
87
+
88
+ saving a copy to pacific/tv-tuvalu.html for debugging
89
+ found section 0 @ 38
90
+ found section 1 @ 1391
91
+ found section 2 @ 13580
92
+ found section 3 @ 33729
93
+ found section 4 @ 50390
94
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
95
+
96
+ saving a copy to africa/ss-south-sudan.html for debugging
97
+ found section 0 @ 38
98
+ found section 1 @ 2560
99
+ found section 2 @ 11342
100
+ found section 3 @ 26234
101
+ found section 4 @ 42271
102
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
103
+
104
+
105
+ =end
106
+
107
+
35
108
 
36
109
  desc 'generate json for factbook.json repo'
37
110
  task :genjson do
38
111
  require 'factbook'
39
112
 
40
113
  countries = [
41
- 'au',
42
- 'be',
43
- 'br',
44
- 'mx',
45
- 'us'
114
+ =begin
115
+ ['xx', 'world' ], ## special code for the world
116
+
117
+ ['ee', 'europe/eu-european-union'], ## special code for the european union
118
+ ['al', 'europe/al-albania' ],
119
+ ['an', 'europe/ad-andorra' ],
120
+ ['am', 'europe/am-armenia' ],
121
+ ['au', 'europe/at-austria' ],
122
+ ['aj', 'europe/az-azerbaijan' ],
123
+ ['bo', 'europe/by-belarus' ],
124
+ ['be', 'europe/be-belgium' ],
125
+ ['bk', 'europe/ba-bosnia-n-herzegovina' ],
126
+ ['bu', 'europe/bg-bulgaria' ],
127
+ ['hr', 'europe/hr-croatia' ],
128
+ ['cy', 'europe/cy-cyprus' ],
129
+ ['ez', 'europe/cz-czech-republic' ],
130
+ ['da', 'europe/dk-denmark' ],
131
+ ['en', 'europe/ee-estonia' ],
132
+ ['fi', 'europe/fi-finland' ],
133
+ ['fr', 'europe/fr-france' ],
134
+ ['gg', 'europe/ge-georgia' ],
135
+ ['gm', 'europe/de-germany' ],
136
+ ['uk', 'europe/gb-great-britain' ],
137
+ ['gr', 'europe/gr-greece' ],
138
+ ['hu', 'europe/hu-hungary' ],
139
+ ['ic', 'europe/is-iceland' ],
140
+ ['ei', 'europe/ie-ireland' ],
141
+ ['it', 'europe/it-italy' ],
142
+ ['lg', 'europe/lv-latvia' ],
143
+ # ['ls', 'europe/li-liechtenstein' ],
144
+ ['lh', 'europe/lt-lithuania' ],
145
+ ['lu', 'europe/lu-luxembourg' ],
146
+ ['mk', 'europe/mk-macedonia' ],
147
+ ['mt', 'europe/mt-malta' ],
148
+ ['md', 'europe/md-moldova' ],
149
+ # ['mn', 'europe/mc-monaco' ],
150
+ ['mj', 'europe/me-montenegro' ],
151
+ ['nl', 'europe/nl-netherlands' ],
152
+ ['no', 'europe/no-norway' ],
153
+ ['pl', 'europe/pl-poland' ],
154
+ ['po', 'europe/pt-portugal' ],
155
+ ['ro', 'europe/ro-romania' ],
156
+ ['rs', 'europe/ru-russia' ],
157
+ # ['sm', 'europe/sm-san-marino' ],
158
+ ['ri', 'europe/rs-serbia' ],
159
+ ['lo', 'europe/sk-slovakia' ],
160
+ ['si', 'europe/si-slovenia' ],
161
+ ['sp', 'europe/es-spain' ],
162
+ ['sw', 'europe/se-sweden' ],
163
+ ['sz', 'europe/ch-switzerland' ],
164
+ ['tu', 'europe/tr-turkey' ],
165
+ ['up', 'europe/ua-ukraine' ],
166
+ # ['vt', 'europe/va-vatican-city' ],
167
+
168
+ ['ca', 'north-america/ca-canada' ],
169
+ ['us', 'north-america/us-united-states' ],
170
+ ['mx', 'north-america/mx-mexico' ],
171
+
172
+ ['ac', 'caribbean/ag-antigua-n-barbuda' ],
173
+ ['bf', 'caribbean/bs-bahamas' ],
174
+ ['bb', 'caribbean/bb-barbados' ],
175
+ ['cu', 'caribbean/cu-cuba' ],
176
+ ['do', 'caribbean/dm-dominica' ],
177
+ ['dr', 'caribbean/do-dominican-republic' ],
178
+ ['gj', 'caribbean/gd-grenada' ],
179
+ ['ha', 'caribbean/ht-haiti' ],
180
+ ['jm', 'caribbean/jm-jamaica' ],
181
+ ['sc', 'caribbean/kn-saint-kitts-n-nevis' ],
182
+ ['st', 'caribbean/lc-saint-lucia' ],
183
+ ['vc', 'caribbean/vc-saint-vincent-n-the-grenadines' ],
184
+ ['td', 'caribbean/tt-trinidad-n-tobago' ],
185
+
186
+ ['bh', 'central-america/bz-belize' ],
187
+ ['cs', 'central-america/cr-costa-rica' ],
188
+ ['es', 'central-america/sv-el-salvador' ],
189
+ ['gt', 'central-america/gt-guatemala' ],
190
+ ['ho', 'central-america/hn-honduras' ],
191
+ ['nu', 'central-america/ni-nicaragua' ],
192
+ ['pm', 'central-america/pa-panama' ],
193
+
194
+ ['ar', 'south-america/ar-argentina' ],
195
+ ['bl', 'south-america/bo-bolivia' ],
196
+ ['br', 'south-america/br-brazil' ],
197
+ ['ci', 'south-america/cl-chile' ],
198
+ ['co', 'south-america/co-colombia' ],
199
+ ['ec', 'south-america/ec-ecuador' ],
200
+ ['gy', 'south-america/gy-guyana' ],
201
+ ['pa', 'south-america/py-paraguay' ],
202
+ ['pe', 'south-america/pe-peru' ],
203
+ ['ns', 'south-america/sr-suriname' ],
204
+ ['uy', 'south-america/uy-uruguay' ],
205
+ ['ve', 'south-america/ve-venezuela' ],
206
+
207
+ ['ag', 'africa/dz-algeria' ],
208
+ ['ao', 'africa/ao-angola' ],
209
+ ['bn', 'africa/bj-benin' ],
210
+ ['bc', 'africa/bw-botswana' ],
211
+ ['uv', 'africa/bf-burkina-faso' ],
212
+ ['by', 'africa/bi-burundi' ],
213
+ ['cm', 'africa/cm-cameroon' ],
214
+ ['cv', 'africa/cv-cape-verde' ],
215
+ ['ct', 'africa/cf-central-african-republic' ],
216
+ ['cd', 'africa/td-chad' ],
217
+ ['cn', 'africa/km-comoros' ],
218
+ ['cf', 'africa/cg-congo' ],
219
+ ['cg', 'africa/cd-congo-dr' ],
220
+ ['iv', 'africa/ci-cote-d-ivoire' ],
221
+ ['dj', 'africa/dj-djibouti' ],
222
+ ['eg', 'africa/eg-egypt' ],
223
+ ['ek', 'africa/gq-equatorial-guinea' ],
224
+ ['er', 'africa/er-eritrea' ],
225
+ ['et', 'africa/et-ethiopia' ],
226
+ ['gb', 'africa/ga-gabon' ],
227
+ ['ga', 'africa/gm-gambia' ],
228
+ ['gh', 'africa/gh-ghana' ],
229
+ ['gv', 'africa/gn-guinea' ],
230
+ ['pu', 'africa/gw-guinea-bissau' ],
231
+ ['ke', 'africa/ke-kenya' ],
232
+ ['lt', 'africa/ls-lesotho' ],
233
+ ['li', 'africa/lr-liberia' ],
234
+ ['ly', 'africa/ly-libya' ],
235
+ ['ma', 'africa/mg-madagascar' ],
236
+ ['mi', 'africa/mw-malawi' ],
237
+ ['ml', 'africa/ml-mali' ],
238
+ ['mr', 'africa/mr-mauritania' ],
239
+ ['mp', 'africa/mu-mauritius' ],
240
+ ['mo', 'africa/ma-morocco' ],
241
+ ['mz', 'africa/mz-mozambique' ],
242
+ ['wa', 'africa/na-namibia' ],
243
+ ['ng', 'africa/ne-niger' ],
244
+ ['ni', 'africa/ng-nigeria' ],
245
+ ['rw', 'africa/rw-rwanda' ],
246
+ ['tp', 'africa/st-st-sao-tome-n-principe' ],
247
+ ['sg', 'africa/sn-senegal' ],
248
+ ['se', 'africa/sc-seychelles' ],
249
+ ['sl', 'africa/sl-sierra-leone' ],
250
+ ['so', 'africa/so-somalia' ],
251
+ ['sf', 'africa/za-south-africa' ],
252
+ # ['od', 'africa/ss-south-sudan' ],
253
+ ['su', 'africa/sd-sudan' ],
254
+ ['wz', 'africa/sz-swaziland' ],
255
+ ['tz', 'africa/tz-tanzania' ],
256
+ ['to', 'africa/tg-togo' ],
257
+ ['ts', 'africa/tn-tunisia' ],
258
+ ['ug', 'africa/ug-uganda' ],
259
+ ['za', 'africa/zm-zambia' ],
260
+ ['zi', 'africa/zw-zimbabwe' ],
261
+
262
+ ['ba', 'middle-east/bh-bahrain' ],
263
+ ['ir', 'middle-east/ir-iran' ],
264
+ ['iz', 'middle-east/iq-iraq' ],
265
+ ['is', 'middle-east/il-israel' ],
266
+ ['jo', 'middle-east/jo-jordan' ],
267
+ ['ku', 'middle-east/kw-kuwait' ],
268
+ ['le', 'middle-east/lb-lebanon' ],
269
+ ['mu', 'middle-east/om-oman' ],
270
+ ### ['??', 'middle-east/ps-palestine' ], -- incl. gaza strip n west bank
271
+ ['qa', 'middle-east/qa-qatar' ],
272
+ ['sa', 'middle-east/sa-saudi-arabia' ],
273
+ ['sy', 'middle-east/sy-syria' ],
274
+ ['ae', 'middle-east/ae-united-arab-emirates' ],
275
+ ['ym', 'middle-east/ye-yemen' ],
276
+
277
+ ['af', 'asia/af-afghanistan' ],
278
+ ['bg', 'asia/bd-bangladesh' ],
279
+ ['bt', 'asia/bt-bhutan' ],
280
+ ['bx', 'asia/bn-brunei' ],
281
+ ['cb', 'asia/kh-cambodia' ],
282
+ ['ch', 'asia/cn-china' ],
283
+ ['in', 'asia/in-india' ],
284
+ ['id', 'asia/id-indonesia' ],
285
+ ['ja', 'asia/jp-japan' ],
286
+ ['kz', 'asia/kz-kazakhstan' ],
287
+ ['kg', 'asia/kg-kyrgyzstan' ],
288
+ ['la', 'asia/la-laos' ],
289
+ ['my', 'asia/my-malaysia' ],
290
+ ['mv', 'asia/mv-maldives' ],
291
+ ['mg', 'asia/mn-mongolia' ],
292
+ ['bm', 'asia/mm-myanmar' ], ## still using Burma
293
+ ['np', 'asia/np-nepal' ],
294
+ ['kn', 'asia/kp-north-korea' ],
295
+ ['pk', 'asia/pk-pakistan' ],
296
+ ['rp', 'asia/ph-philippines' ],
297
+ ['sn', 'asia/sg-singapore' ],
298
+ ['ks', 'asia/kr-south-korea' ],
299
+ ['ce', 'asia/lk-sri-lanka' ],
300
+ ['tw', 'asia/tw-taiwan' ],
301
+ ['ti', 'asia/tj-tajikistan' ],
302
+ ['th', 'asia/th-thailand' ],
303
+ ['tt', 'asia/tl-timor-leste' ],
304
+ ['tx', 'asia/tm-turkmenistan' ],
305
+ ['uz', 'asia/uz-uzbekistan' ],
306
+ ['vm', 'asia/vn-vietnam' ],
307
+
308
+ ['as', 'pacific/au-australia' ],
309
+ ['fj', 'pacific/fj-fiji' ],
310
+ ['kr', 'pacific/ki-kiribati' ],
311
+ # ['rm', 'pacific/mh-marshall-islands' ],
312
+ ['fm', 'pacific/fm-micronesia' ],
313
+ ['nr', 'pacific/nr-nauru' ],
314
+ ['nz', 'pacific/nz-new-zealand' ],
315
+ # ['ps', 'pacific/pw-palau' ],
316
+ ['pp', 'pacific/pg-papua-new-guinea' ],
317
+ ['ws', 'pacific/ws-samoa' ],
318
+ ['bp', 'pacific/sb-solomon-islands' ],
319
+ ['tn', 'pacific/to-tonga' ],
320
+ # ['tv', 'pacific/tv-tuvalu' ],
321
+ ['nh', 'pacific/vu-vanuatu' ],
322
+
323
+ =end
324
+
325
+
326
+
327
+ =begin
328
+ ['', 'africa/' ],
329
+ =end
46
330
  ]
47
331
 
48
332
  countries.each do |country|
@@ -51,29 +335,37 @@ task :genjson do
51
335
  end
52
336
 
53
337
 
54
- def gen_json_for( code )
55
- Dir.mkdir( 'tmp' ) unless Dir.exists?( 'tmp' )
56
- Dir.mkdir( 'tmp/html' ) unless Dir.exists?( 'tmp/html' )
57
- Dir.mkdir( 'tmp/json' ) unless Dir.exists?( 'tmp/json' )
338
+ def gen_json_for( country )
339
+
340
+ country_code = country[0]
341
+ country_path = country[1]
342
+
343
+ path_html = "tmp/html/#{country_path}.html"
344
+ path_json = "tmp/json/#{country_path}.json"
58
345
 
59
- page = Factbook::Page.new( code )
346
+ ## make sure path exist
347
+ FileUtils.mkdir_p( File.dirname( path_html ) )
348
+ FileUtils.mkdir_p( File.dirname( path_json ) )
349
+
350
+
351
+ page = Factbook::Page.new( country_code )
60
352
 
61
353
  ## print first 600 chars
62
354
  pp page.html[0..600]
63
355
 
64
356
  ## save for debuging
65
-
66
- puts "saving a copy to #{code}.html for debugging"
67
- File.open( "tmp/html/#{code}.html", 'w') do |f|
357
+
358
+ puts "saving a copy to #{country_path}.html for debugging"
359
+ File.open( path_html, 'w') do |f|
68
360
  f.write( page.html )
69
361
  end
70
362
 
71
363
  h = page.data
72
- pp h
73
-
364
+ ## pp h
365
+
74
366
  ### save to json
75
- puts "saving a copy to #{code}.json for debugging"
76
- File.open( "tmp/json/#{code}.json", 'w') do |f|
367
+ puts "saving a copy to #{country_path}.json for debugging"
368
+ File.open( path_json, 'w') do |f|
77
369
  f.write( JSON.pretty_generate( h ) )
78
370
  end
79
371
  end