factbook 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +17 -4
- data/Rakefile +48 -0
- data/lib/factbook/page.rb +10 -1
- data/lib/factbook/version.rb +1 -1
- metadata +11 -11
data/README.md
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
## What's the World Factbook?
|
12
12
|
|
13
|
-
The World Factbook published by the Central Intelligence Agency (CIA)
|
13
|
+
The World Factbook [1][2] published by the Central Intelligence Agency (CIA)
|
14
14
|
offers free country profiles in the public domain (that is, no copyright(s), no rights reserved).
|
15
15
|
|
16
16
|
- [1] [The World Factbook](https://www.cia.gov/library/publications/the-world-factbook/)
|
@@ -19,7 +19,7 @@ offers free country profiles in the public domain (that is, no copyright(s), no
|
|
19
19
|
|
20
20
|
## Usage
|
21
21
|
|
22
|
-
### Get page as a hash (that is, structured data e.g. nested key/values)
|
22
|
+
### Get country profile page as a hash (that is, structured data e.g. nested key/values)
|
23
23
|
|
24
24
|
page = Factbook::Page.new( 'br' )
|
25
25
|
pp page.data # pretty print hash
|
@@ -28,7 +28,7 @@ offers free country profiles in the public domain (that is, no copyright(s), no
|
|
28
28
|
|
29
29
|
page = Factbook::Page.new( 'br' )
|
30
30
|
File.open( 'br.json', 'w') do |f|
|
31
|
-
f.write
|
31
|
+
f.write page.to_json( pretty: true )
|
32
32
|
end
|
33
33
|
|
34
34
|
|
@@ -39,11 +39,24 @@ Just install the gem:
|
|
39
39
|
$ gem install factbook
|
40
40
|
|
41
41
|
|
42
|
+
## Ready-To-Use Public Domain Datasets (Generated by `factbook`)
|
42
43
|
|
43
|
-
|
44
|
+
[openmundi/factbook.json](https://github.com/openmundi/factbook.json) - open (public domain)
|
45
|
+
factbook country profiles in JSON for all the world's countries (using internet domain names
|
46
|
+
for country codes e.g. Austria is `at.json` not `au.json`, Germany is `de.json` not `gm.json` and so on)
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
## Alternatives Libraries and Gems
|
51
|
+
|
52
|
+
Ruby
|
44
53
|
|
45
54
|
- [worldfactbook gem](https://github.com/sayem/worldfactbook) by sayem (aka Sayem Khan); fetches data from its own mirror, that is, rubyworldfactbook.com (last updated 2011?)
|
46
55
|
|
56
|
+
Others
|
57
|
+
|
58
|
+
TBD
|
59
|
+
|
47
60
|
|
48
61
|
## License
|
49
62
|
|
data/Rakefile
CHANGED
@@ -30,3 +30,51 @@ Hoe.spec 'factbook' do
|
|
30
30
|
}
|
31
31
|
|
32
32
|
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
desc 'generate json for factbook.json repo'
|
37
|
+
task :genjson do
|
38
|
+
require 'factbook'
|
39
|
+
|
40
|
+
countries = [
|
41
|
+
'au',
|
42
|
+
'be',
|
43
|
+
'br',
|
44
|
+
'mx',
|
45
|
+
'us'
|
46
|
+
]
|
47
|
+
|
48
|
+
countries.each do |country|
|
49
|
+
gen_json_for( country )
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def gen_json_for( code )
|
55
|
+
Dir.mkdir( 'tmp' ) unless Dir.exists?( 'tmp' )
|
56
|
+
Dir.mkdir( 'tmp/html' ) unless Dir.exists?( 'tmp/html' )
|
57
|
+
Dir.mkdir( 'tmp/json' ) unless Dir.exists?( 'tmp/json' )
|
58
|
+
|
59
|
+
page = Factbook::Page.new( code )
|
60
|
+
|
61
|
+
## print first 600 chars
|
62
|
+
pp page.html[0..600]
|
63
|
+
|
64
|
+
## save for debuging
|
65
|
+
|
66
|
+
puts "saving a copy to #{code}.html for debugging"
|
67
|
+
File.open( "tmp/html/#{code}.html", 'w') do |f|
|
68
|
+
f.write( page.html )
|
69
|
+
end
|
70
|
+
|
71
|
+
h = page.data
|
72
|
+
pp h
|
73
|
+
|
74
|
+
### save to json
|
75
|
+
puts "saving a copy to #{code}.json for debugging"
|
76
|
+
File.open( "tmp/json/#{code}.json", 'w') do |f|
|
77
|
+
f.write( JSON.pretty_generate( h ) )
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
data/lib/factbook/page.rb
CHANGED
@@ -21,6 +21,15 @@ module Factbook
|
|
21
21
|
@doc ||= Nokogiri::HTML( html )
|
22
22
|
end
|
23
23
|
|
24
|
+
def to_json( opts={} )
|
25
|
+
## convenience helper for data.to_json
|
26
|
+
if opts[:pretty] || opts[:pp]
|
27
|
+
JSON.pretty_generate( data )
|
28
|
+
else
|
29
|
+
data.to_json
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
24
33
|
def data
|
25
34
|
if @data.nil?
|
26
35
|
titles = [
|
@@ -263,7 +272,7 @@ module Factbook
|
|
263
272
|
key = key.gsub( '(s)', 's' )
|
264
273
|
key = key.gsub( ':', '' ) # trailing :
|
265
274
|
## remove special chars ()-/,'
|
266
|
-
key = key.gsub( /[()\-\/,]
|
275
|
+
key = key.gsub( /['()\-\/,]/, ' ' )
|
267
276
|
key = key.strip
|
268
277
|
key = key.gsub( /[ ]+/, '_' )
|
269
278
|
key
|
data/lib/factbook/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: factbook
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2014-07-12 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &73732120 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *73732120
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &73731730 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *73731730
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &73731340 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *73731340
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rdoc
|
49
|
-
requirement: &
|
49
|
+
requirement: &73730940 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '4.0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *73730940
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: hoe
|
60
|
-
requirement: &
|
60
|
+
requirement: &73730530 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '3.11'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *73730530
|
69
69
|
description: factbook - scripts for the world factbook (get open structured data e.g
|
70
70
|
JSON etc.)
|
71
71
|
email: openmundi@googlegroups.com
|