factbook 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -4,11 +4,16 @@ README.md
4
4
  Rakefile
5
5
  lib/factbook.rb
6
6
  lib/factbook/page.rb
7
+ lib/factbook/sect.rb
7
8
  lib/factbook/version.rb
8
9
  test/data/countrytemplate_au.html
9
10
  test/data/countrytemplate_be.html
10
11
  test/data/countrytemplate_br.html
12
+ test/data/countrytemplate_ee.html
13
+ test/data/countrytemplate_ls.html
11
14
  test/data/countrytemplate_mx.html
15
+ test/data/countrytemplate_vt.html
16
+ test/data/countrytemplate_xx.html
12
17
  test/helper.rb
13
18
  test/test_json.rb
14
19
  test/test_page.rb
data/README.md CHANGED
@@ -21,8 +21,9 @@ offers free country profiles in the public domain (that is, no copyright(s), no
21
21
 
22
22
  ### Get country profile page as a hash (that is, structured data e.g. nested key/values)
23
23
 
24
- page = Factbook::Page.new( 'br' )
25
- pp page.data # pretty print hash
24
+ page = Factbook::Page.new( 'br' ) # br is the country code for Brazil
25
+ pp page.data # pretty print hash
26
+
26
27
 
27
28
  ### Save to disk as JSON
28
29
 
@@ -39,7 +40,9 @@ Just install the gem:
39
40
  $ gem install factbook
40
41
 
41
42
 
42
- ## Ready-To-Use Public Domain Datasets (Generated by `factbook`)
43
+ ## Ready-To-Use Public Domain Datasets
44
+
45
+ Datasets generated by `factbook` include:
43
46
 
44
47
  [openmundi/factbook.json](https://github.com/openmundi/factbook.json) - open (public domain)
45
48
  factbook country profiles in JSON for all the world's countries (using internet domain names
data/Rakefile CHANGED
@@ -32,17 +32,301 @@ Hoe.spec 'factbook' do
32
32
  end
33
33
 
34
34
 
35
+ =begin
36
+ # errors to fix:
37
+ saving a copy to europe/li-liechtenstein.html for debugging
38
+ found section 0 @ 38
39
+ found section 1 @ 1882
40
+ found section 2 @ 13160
41
+ found section 3 @ 29355
42
+ found section 4 @ 46010
43
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
44
+ found section 6 @ 64725
45
+
46
+ aving a copy to europe/mc-monaco.html for debugging
47
+ found section 0 @ 38
48
+ found section 1 @ 1446
49
+ found section 2 @ 12736
50
+ found section 3 @ 31192
51
+ found section 4 @ 47762
52
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
53
+
54
+ saving a copy to europe/sm-san-marino.html for debugging
55
+ found section 0 @ 38
56
+ found section 1 @ 1379
57
+ found section 2 @ 12243
58
+ found section 3 @ 27349
59
+ found section 4 @ 46949
60
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
61
+
62
+ saving a copy to europe/va-vatican-city.html for debugging
63
+ found section 0 @ 38
64
+ found section 1 @ 2000
65
+ found section 2 @ 13093
66
+ found section 3 @ 19912
67
+ found section 4 @ 37264
68
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
69
+ found section 6 @ 44353
70
+ *** error: section not found -- <div id="CollapsiblePanel1_Trans"
71
+
72
+ saving a copy to pacific/mh-marshall-islands.html for debugging
73
+ found section 0 @ 38
74
+ found section 1 @ 1414
75
+ found section 2 @ 13404
76
+ found section 3 @ 34854
77
+ found section 4 @ 52734
78
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
79
+
80
+ saving a copy to pacific/pw-palau.html for debugging
81
+ found section 0 @ 38
82
+ found section 1 @ 1338
83
+ found section 2 @ 12729
84
+ found section 3 @ 34145
85
+ found section 4 @ 51005
86
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
87
+
88
+ saving a copy to pacific/tv-tuvalu.html for debugging
89
+ found section 0 @ 38
90
+ found section 1 @ 1391
91
+ found section 2 @ 13580
92
+ found section 3 @ 33729
93
+ found section 4 @ 50390
94
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
95
+
96
+ saving a copy to africa/ss-south-sudan.html for debugging
97
+ found section 0 @ 38
98
+ found section 1 @ 2560
99
+ found section 2 @ 11342
100
+ found section 3 @ 26234
101
+ found section 4 @ 42271
102
+ *** error: section not found -- <div id="CollapsiblePanel1_Energy"
103
+
104
+
105
+ =end
106
+
107
+
35
108
 
36
109
  desc 'generate json for factbook.json repo'
37
110
  task :genjson do
38
111
  require 'factbook'
39
112
 
40
113
  countries = [
41
- 'au',
42
- 'be',
43
- 'br',
44
- 'mx',
45
- 'us'
114
+ =begin
115
+ ['xx', 'world' ], ## special code for the world
116
+
117
+ ['ee', 'europe/eu-european-union'], ## special code for the european union
118
+ ['al', 'europe/al-albania' ],
119
+ ['an', 'europe/ad-andorra' ],
120
+ ['am', 'europe/am-armenia' ],
121
+ ['au', 'europe/at-austria' ],
122
+ ['aj', 'europe/az-azerbaijan' ],
123
+ ['bo', 'europe/by-belarus' ],
124
+ ['be', 'europe/be-belgium' ],
125
+ ['bk', 'europe/ba-bosnia-n-herzegovina' ],
126
+ ['bu', 'europe/bg-bulgaria' ],
127
+ ['hr', 'europe/hr-croatia' ],
128
+ ['cy', 'europe/cy-cyprus' ],
129
+ ['ez', 'europe/cz-czech-republic' ],
130
+ ['da', 'europe/dk-denmark' ],
131
+ ['en', 'europe/ee-estonia' ],
132
+ ['fi', 'europe/fi-finland' ],
133
+ ['fr', 'europe/fr-france' ],
134
+ ['gg', 'europe/ge-georgia' ],
135
+ ['gm', 'europe/de-germany' ],
136
+ ['uk', 'europe/gb-great-britain' ],
137
+ ['gr', 'europe/gr-greece' ],
138
+ ['hu', 'europe/hu-hungary' ],
139
+ ['ic', 'europe/is-iceland' ],
140
+ ['ei', 'europe/ie-ireland' ],
141
+ ['it', 'europe/it-italy' ],
142
+ ['lg', 'europe/lv-latvia' ],
143
+ # ['ls', 'europe/li-liechtenstein' ],
144
+ ['lh', 'europe/lt-lithuania' ],
145
+ ['lu', 'europe/lu-luxembourg' ],
146
+ ['mk', 'europe/mk-macedonia' ],
147
+ ['mt', 'europe/mt-malta' ],
148
+ ['md', 'europe/md-moldova' ],
149
+ # ['mn', 'europe/mc-monaco' ],
150
+ ['mj', 'europe/me-montenegro' ],
151
+ ['nl', 'europe/nl-netherlands' ],
152
+ ['no', 'europe/no-norway' ],
153
+ ['pl', 'europe/pl-poland' ],
154
+ ['po', 'europe/pt-portugal' ],
155
+ ['ro', 'europe/ro-romania' ],
156
+ ['rs', 'europe/ru-russia' ],
157
+ # ['sm', 'europe/sm-san-marino' ],
158
+ ['ri', 'europe/rs-serbia' ],
159
+ ['lo', 'europe/sk-slovakia' ],
160
+ ['si', 'europe/si-slovenia' ],
161
+ ['sp', 'europe/es-spain' ],
162
+ ['sw', 'europe/se-sweden' ],
163
+ ['sz', 'europe/ch-switzerland' ],
164
+ ['tu', 'europe/tr-turkey' ],
165
+ ['up', 'europe/ua-ukraine' ],
166
+ # ['vt', 'europe/va-vatican-city' ],
167
+
168
+ ['ca', 'north-america/ca-canada' ],
169
+ ['us', 'north-america/us-united-states' ],
170
+ ['mx', 'north-america/mx-mexico' ],
171
+
172
+ ['ac', 'caribbean/ag-antigua-n-barbuda' ],
173
+ ['bf', 'caribbean/bs-bahamas' ],
174
+ ['bb', 'caribbean/bb-barbados' ],
175
+ ['cu', 'caribbean/cu-cuba' ],
176
+ ['do', 'caribbean/dm-dominica' ],
177
+ ['dr', 'caribbean/do-dominican-republic' ],
178
+ ['gj', 'caribbean/gd-grenada' ],
179
+ ['ha', 'caribbean/ht-haiti' ],
180
+ ['jm', 'caribbean/jm-jamaica' ],
181
+ ['sc', 'caribbean/kn-saint-kitts-n-nevis' ],
182
+ ['st', 'caribbean/lc-saint-lucia' ],
183
+ ['vc', 'caribbean/vc-saint-vincent-n-the-grenadines' ],
184
+ ['td', 'caribbean/tt-trinidad-n-tobago' ],
185
+
186
+ ['bh', 'central-america/bz-belize' ],
187
+ ['cs', 'central-america/cr-costa-rica' ],
188
+ ['es', 'central-america/sv-el-salvador' ],
189
+ ['gt', 'central-america/gt-guatemala' ],
190
+ ['ho', 'central-america/hn-honduras' ],
191
+ ['nu', 'central-america/ni-nicaragua' ],
192
+ ['pm', 'central-america/pa-panama' ],
193
+
194
+ ['ar', 'south-america/ar-argentina' ],
195
+ ['bl', 'south-america/bo-bolivia' ],
196
+ ['br', 'south-america/br-brazil' ],
197
+ ['ci', 'south-america/cl-chile' ],
198
+ ['co', 'south-america/co-colombia' ],
199
+ ['ec', 'south-america/ec-ecuador' ],
200
+ ['gy', 'south-america/gy-guyana' ],
201
+ ['pa', 'south-america/py-paraguay' ],
202
+ ['pe', 'south-america/pe-peru' ],
203
+ ['ns', 'south-america/sr-suriname' ],
204
+ ['uy', 'south-america/uy-uruguay' ],
205
+ ['ve', 'south-america/ve-venezuela' ],
206
+
207
+ ['ag', 'africa/dz-algeria' ],
208
+ ['ao', 'africa/ao-angola' ],
209
+ ['bn', 'africa/bj-benin' ],
210
+ ['bc', 'africa/bw-botswana' ],
211
+ ['uv', 'africa/bf-burkina-faso' ],
212
+ ['by', 'africa/bi-burundi' ],
213
+ ['cm', 'africa/cm-cameroon' ],
214
+ ['cv', 'africa/cv-cape-verde' ],
215
+ ['ct', 'africa/cf-central-african-republic' ],
216
+ ['cd', 'africa/td-chad' ],
217
+ ['cn', 'africa/km-comoros' ],
218
+ ['cf', 'africa/cg-congo' ],
219
+ ['cg', 'africa/cd-congo-dr' ],
220
+ ['iv', 'africa/ci-cote-d-ivoire' ],
221
+ ['dj', 'africa/dj-djibouti' ],
222
+ ['eg', 'africa/eg-egypt' ],
223
+ ['ek', 'africa/gq-equatorial-guinea' ],
224
+ ['er', 'africa/er-eritrea' ],
225
+ ['et', 'africa/et-ethiopia' ],
226
+ ['gb', 'africa/ga-gabon' ],
227
+ ['ga', 'africa/gm-gambia' ],
228
+ ['gh', 'africa/gh-ghana' ],
229
+ ['gv', 'africa/gn-guinea' ],
230
+ ['pu', 'africa/gw-guinea-bissau' ],
231
+ ['ke', 'africa/ke-kenya' ],
232
+ ['lt', 'africa/ls-lesotho' ],
233
+ ['li', 'africa/lr-liberia' ],
234
+ ['ly', 'africa/ly-libya' ],
235
+ ['ma', 'africa/mg-madagascar' ],
236
+ ['mi', 'africa/mw-malawi' ],
237
+ ['ml', 'africa/ml-mali' ],
238
+ ['mr', 'africa/mr-mauritania' ],
239
+ ['mp', 'africa/mu-mauritius' ],
240
+ ['mo', 'africa/ma-morocco' ],
241
+ ['mz', 'africa/mz-mozambique' ],
242
+ ['wa', 'africa/na-namibia' ],
243
+ ['ng', 'africa/ne-niger' ],
244
+ ['ni', 'africa/ng-nigeria' ],
245
+ ['rw', 'africa/rw-rwanda' ],
246
+ ['tp', 'africa/st-st-sao-tome-n-principe' ],
247
+ ['sg', 'africa/sn-senegal' ],
248
+ ['se', 'africa/sc-seychelles' ],
249
+ ['sl', 'africa/sl-sierra-leone' ],
250
+ ['so', 'africa/so-somalia' ],
251
+ ['sf', 'africa/za-south-africa' ],
252
+ # ['od', 'africa/ss-south-sudan' ],
253
+ ['su', 'africa/sd-sudan' ],
254
+ ['wz', 'africa/sz-swaziland' ],
255
+ ['tz', 'africa/tz-tanzania' ],
256
+ ['to', 'africa/tg-togo' ],
257
+ ['ts', 'africa/tn-tunisia' ],
258
+ ['ug', 'africa/ug-uganda' ],
259
+ ['za', 'africa/zm-zambia' ],
260
+ ['zi', 'africa/zw-zimbabwe' ],
261
+
262
+ ['ba', 'middle-east/bh-bahrain' ],
263
+ ['ir', 'middle-east/ir-iran' ],
264
+ ['iz', 'middle-east/iq-iraq' ],
265
+ ['is', 'middle-east/il-israel' ],
266
+ ['jo', 'middle-east/jo-jordan' ],
267
+ ['ku', 'middle-east/kw-kuwait' ],
268
+ ['le', 'middle-east/lb-lebanon' ],
269
+ ['mu', 'middle-east/om-oman' ],
270
+ ### ['??', 'middle-east/ps-palestine' ], -- incl. gaza strip n west bank
271
+ ['qa', 'middle-east/qa-qatar' ],
272
+ ['sa', 'middle-east/sa-saudi-arabia' ],
273
+ ['sy', 'middle-east/sy-syria' ],
274
+ ['ae', 'middle-east/ae-united-arab-emirates' ],
275
+ ['ym', 'middle-east/ye-yemen' ],
276
+
277
+ ['af', 'asia/af-afghanistan' ],
278
+ ['bg', 'asia/bd-bangladesh' ],
279
+ ['bt', 'asia/bt-bhutan' ],
280
+ ['bx', 'asia/bn-brunei' ],
281
+ ['cb', 'asia/kh-cambodia' ],
282
+ ['ch', 'asia/cn-china' ],
283
+ ['in', 'asia/in-india' ],
284
+ ['id', 'asia/id-indonesia' ],
285
+ ['ja', 'asia/jp-japan' ],
286
+ ['kz', 'asia/kz-kazakhstan' ],
287
+ ['kg', 'asia/kg-kyrgyzstan' ],
288
+ ['la', 'asia/la-laos' ],
289
+ ['my', 'asia/my-malaysia' ],
290
+ ['mv', 'asia/mv-maldives' ],
291
+ ['mg', 'asia/mn-mongolia' ],
292
+ ['bm', 'asia/mm-myanmar' ], ## still using Burma
293
+ ['np', 'asia/np-nepal' ],
294
+ ['kn', 'asia/kp-north-korea' ],
295
+ ['pk', 'asia/pk-pakistan' ],
296
+ ['rp', 'asia/ph-philippines' ],
297
+ ['sn', 'asia/sg-singapore' ],
298
+ ['ks', 'asia/kr-south-korea' ],
299
+ ['ce', 'asia/lk-sri-lanka' ],
300
+ ['tw', 'asia/tw-taiwan' ],
301
+ ['ti', 'asia/tj-tajikistan' ],
302
+ ['th', 'asia/th-thailand' ],
303
+ ['tt', 'asia/tl-timor-leste' ],
304
+ ['tx', 'asia/tm-turkmenistan' ],
305
+ ['uz', 'asia/uz-uzbekistan' ],
306
+ ['vm', 'asia/vn-vietnam' ],
307
+
308
+ ['as', 'pacific/au-australia' ],
309
+ ['fj', 'pacific/fj-fiji' ],
310
+ ['kr', 'pacific/ki-kiribati' ],
311
+ # ['rm', 'pacific/mh-marshall-islands' ],
312
+ ['fm', 'pacific/fm-micronesia' ],
313
+ ['nr', 'pacific/nr-nauru' ],
314
+ ['nz', 'pacific/nz-new-zealand' ],
315
+ # ['ps', 'pacific/pw-palau' ],
316
+ ['pp', 'pacific/pg-papua-new-guinea' ],
317
+ ['ws', 'pacific/ws-samoa' ],
318
+ ['bp', 'pacific/sb-solomon-islands' ],
319
+ ['tn', 'pacific/to-tonga' ],
320
+ # ['tv', 'pacific/tv-tuvalu' ],
321
+ ['nh', 'pacific/vu-vanuatu' ],
322
+
323
+ =end
324
+
325
+
326
+
327
+ =begin
328
+ ['', 'africa/' ],
329
+ =end
46
330
  ]
47
331
 
48
332
  countries.each do |country|
@@ -51,29 +335,37 @@ task :genjson do
51
335
  end
52
336
 
53
337
 
54
- def gen_json_for( code )
55
- Dir.mkdir( 'tmp' ) unless Dir.exists?( 'tmp' )
56
- Dir.mkdir( 'tmp/html' ) unless Dir.exists?( 'tmp/html' )
57
- Dir.mkdir( 'tmp/json' ) unless Dir.exists?( 'tmp/json' )
338
+ def gen_json_for( country )
339
+
340
+ country_code = country[0]
341
+ country_path = country[1]
342
+
343
+ path_html = "tmp/html/#{country_path}.html"
344
+ path_json = "tmp/json/#{country_path}.json"
58
345
 
59
- page = Factbook::Page.new( code )
346
+ ## make sure path exist
347
+ FileUtils.mkdir_p( File.dirname( path_html ) )
348
+ FileUtils.mkdir_p( File.dirname( path_json ) )
349
+
350
+
351
+ page = Factbook::Page.new( country_code )
60
352
 
61
353
  ## print first 600 chars
62
354
  pp page.html[0..600]
63
355
 
64
356
  ## save for debuging
65
-
66
- puts "saving a copy to #{code}.html for debugging"
67
- File.open( "tmp/html/#{code}.html", 'w') do |f|
357
+
358
+ puts "saving a copy to #{country_path}.html for debugging"
359
+ File.open( path_html, 'w') do |f|
68
360
  f.write( page.html )
69
361
  end
70
362
 
71
363
  h = page.data
72
- pp h
73
-
364
+ ## pp h
365
+
74
366
  ### save to json
75
- puts "saving a copy to #{code}.json for debugging"
76
- File.open( "tmp/json/#{code}.json", 'w') do |f|
367
+ puts "saving a copy to #{country_path}.json for debugging"
368
+ File.open( path_json, 'w') do |f|
77
369
  f.write( JSON.pretty_generate( h ) )
78
370
  end
79
371
  end