factbook-readers 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Manifest.txt +3 -25
- data/README.md +11 -69
- data/Rakefile +3 -3
- data/lib/factbook-readers.rb +5 -40
- data/lib/factbook-readers/convert.rb +37 -0
- data/lib/factbook-readers/counter.rb +7 -9
- data/lib/factbook-readers/page.rb +41 -61
- data/lib/factbook-readers/page_info.rb +15 -3
- data/lib/factbook-readers/version.rb +2 -2
- data/test/helper.rb +3 -0
- data/test/test_counter.rb +9 -6
- data/test/test_download.rb +27 -0
- data/test/test_fields.rb +44 -27
- data/test/test_json.rb +4 -4
- data/test/test_page.rb +8 -8
- data/test/test_version.rb +15 -0
- metadata +11 -48
- data/data/categories.csv +0 -164
- data/data/codes.csv +0 -262
- data/data/codesxref.csv +0 -280
- data/data/comparisons.csv +0 -75
- data/lib/factbook-readers/builder.rb +0 -187
- data/lib/factbook-readers/builder_item.rb +0 -201
- data/lib/factbook-readers/builder_json.rb +0 -68
- data/lib/factbook-readers/codes.rb +0 -121
- data/lib/factbook-readers/comparisons.rb +0 -49
- data/lib/factbook-readers/normalize.rb +0 -42
- data/lib/factbook-readers/reader_json.rb +0 -50
- data/lib/factbook-readers/sanitizer.rb +0 -351
- data/lib/factbook-readers/sect.rb +0 -28
- data/lib/factbook-readers/subsect.rb +0 -17
- data/lib/factbook-readers/table.rb +0 -51
- data/lib/factbook-readers/utils.rb +0 -47
- data/lib/factbook-readers/utils_info.rb +0 -128
- data/test/test_builder.rb +0 -30
- data/test/test_codes.rb +0 -72
- data/test/test_comparisons.rb +0 -16
- data/test/test_item_builder.rb +0 -97
- data/test/test_json_builder.rb +0 -23
- data/test/test_normalize.rb +0 -21
- data/test/test_sanitizer.rb +0 -36
- data/test/test_sanitizer_regex.rb +0 -87
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e5d0dc182771764b690661c69fdc905c4197b0a47e22f8a06903087719a3bb11
|
4
|
+
data.tar.gz: 7e995f86a4a1a9bb307914caff0ebb2ce39b0d2fafb62e8b1d4790608d7d6c5c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b640ed837c55588df3f27381d86aeec14677d54d47236cf486ced33952a2619341fb396d79e280a2aa93c01600151a6ea3b0baa6a6d66c7c5ad6115479a72bbd
|
7
|
+
data.tar.gz: 147439c2077725912bf0ede44bb30376112abc1ec7810f9f17d46643cbc055e46ff0e2051e07098a284ff4c1990edd0051246cbd04eebd05b0603a72f1f1d128
|
data/Manifest.txt
CHANGED
@@ -2,39 +2,17 @@ CHANGELOG.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
-
data/categories.csv
|
6
|
-
data/codes.csv
|
7
|
-
data/codesxref.csv
|
8
|
-
data/comparisons.csv
|
9
5
|
lib/factbook-readers.rb
|
10
|
-
lib/factbook-readers/
|
11
|
-
lib/factbook-readers/builder_item.rb
|
12
|
-
lib/factbook-readers/builder_json.rb
|
13
|
-
lib/factbook-readers/codes.rb
|
14
|
-
lib/factbook-readers/comparisons.rb
|
6
|
+
lib/factbook-readers/convert.rb
|
15
7
|
lib/factbook-readers/counter.rb
|
16
|
-
lib/factbook-readers/normalize.rb
|
17
8
|
lib/factbook-readers/page.rb
|
18
9
|
lib/factbook-readers/page_info.rb
|
19
|
-
lib/factbook-readers/reader_json.rb
|
20
|
-
lib/factbook-readers/sanitizer.rb
|
21
|
-
lib/factbook-readers/sect.rb
|
22
|
-
lib/factbook-readers/subsect.rb
|
23
|
-
lib/factbook-readers/table.rb
|
24
|
-
lib/factbook-readers/utils.rb
|
25
|
-
lib/factbook-readers/utils_info.rb
|
26
10
|
lib/factbook-readers/version.rb
|
27
11
|
lib/factbook/readers.rb
|
28
12
|
test/helper.rb
|
29
|
-
test/test_builder.rb
|
30
|
-
test/test_codes.rb
|
31
|
-
test/test_comparisons.rb
|
32
13
|
test/test_counter.rb
|
14
|
+
test/test_download.rb
|
33
15
|
test/test_fields.rb
|
34
|
-
test/test_item_builder.rb
|
35
16
|
test/test_json.rb
|
36
|
-
test/test_json_builder.rb
|
37
|
-
test/test_normalize.rb
|
38
17
|
test/test_page.rb
|
39
|
-
test/
|
40
|
-
test/test_sanitizer_regex.rb
|
18
|
+
test/test_version.rb
|
data/README.md
CHANGED
@@ -7,6 +7,11 @@
|
|
7
7
|
* forum :: [groups.google.com/group/openmundi](https://groups.google.com/group/openmundi)
|
8
8
|
|
9
9
|
|
10
|
+
## What's the World Factbook?
|
11
|
+
|
12
|
+
See [factbook/factbook.json »](https://github.com/factbook/factbook.json)
|
13
|
+
|
14
|
+
|
10
15
|
|
11
16
|
## Usage
|
12
17
|
|
@@ -14,7 +19,7 @@
|
|
14
19
|
|
15
20
|
```ruby
|
16
21
|
page = Factbook::Page.new( 'br' ) # br is the country code for Brazil
|
17
|
-
pp page.
|
22
|
+
pp page.to_h # pretty print data hash
|
18
23
|
```
|
19
24
|
|
20
25
|
resulting in:
|
@@ -94,78 +99,15 @@ end
|
|
94
99
|
```
|
95
100
|
|
96
101
|
|
97
|
-
### List all codes
|
98
|
-
|
99
|
-
```ruby
|
100
|
-
Factbook.codes.each do |code|
|
101
|
-
pp code
|
102
|
-
end
|
103
|
-
```
|
104
|
-
|
105
|
-
resulting in:
|
106
|
-
|
107
|
-
```
|
108
|
-
#<struct Factbook::Codes::Code
|
109
|
-
code ="af",
|
110
|
-
name ="Afghanistan",
|
111
|
-
category="Countries",
|
112
|
-
region ="South Asia">
|
113
|
-
#<struct Factbook::Codes::Code
|
114
|
-
code ="al",
|
115
|
-
name ="Albania",
|
116
|
-
category="Countries",
|
117
|
-
region ="Europe">
|
118
|
-
#<struct Factbook::Codes::Code
|
119
|
-
code ="ag",
|
120
|
-
name ="Algeria",
|
121
|
-
category="Countries",
|
122
|
-
region ="Africa">
|
123
|
-
#<struct Factbook::Codes::Code
|
124
|
-
code ="an",
|
125
|
-
name ="Andorra",
|
126
|
-
category="Countries",
|
127
|
-
region ="Europe">
|
128
|
-
...
|
129
|
-
```
|
130
|
-
|
131
|
-
Note: You can filter codes by category e.g. Countries, Dependencies, Miscellaneous, Oceans, etc.
|
132
|
-
and/or by region e.g. Africa, Europe, South Asia, Central America and Caribbean, etc.
|
133
102
|
|
134
103
|
|
135
|
-
```ruby
|
136
|
-
|
137
|
-
assert_equal 261, Factbook.codes.size
|
138
|
-
|
139
|
-
## categories
|
140
|
-
assert_equal 195, Factbook.codes.countries.size
|
141
|
-
assert_equal 52, Factbook.codes.dependencies.size
|
142
|
-
assert_equal 5, Factbook.codes.oceans.size
|
143
|
-
assert_equal 1, Factbook.codes.world.size
|
144
|
-
assert_equal 2, Factbook.codes.others.size
|
145
|
-
assert_equal 6, Factbook.codes.misc.size
|
146
|
-
|
147
|
-
## regions
|
148
|
-
assert_equal 55, Factbook.codes.europe.size
|
149
|
-
assert_equal 9, Factbook.codes.south_asia.size
|
150
|
-
assert_equal 6, Factbook.codes.central_asia.size
|
151
|
-
assert_equal 22, Factbook.codes.east_n_souteast_asia.size
|
152
|
-
assert_equal 19, Factbook.codes.middle_east.size
|
153
|
-
assert_equal 56, Factbook.codes.africa.size
|
154
|
-
assert_equal 7, Factbook.codes.north_america.size
|
155
|
-
assert_equal 33, Factbook.codes.central_america_n_caribbean.size
|
156
|
-
assert_equal 14, Factbook.codes.south_america.size
|
157
|
-
assert_equal 30, Factbook.codes.australia_oceania.size
|
158
|
-
assert_equal 4, Factbook.codes.antartica.size
|
159
|
-
assert_equal 5, Factbook.codes.region('Oceans').size
|
160
|
-
assert_equal 1, Factbook.codes.region('World').size
|
161
|
-
|
162
|
-
## categories + regions
|
163
|
-
assert_equal 45, Factbook.codes.countries.europe.size
|
164
|
-
...
|
165
|
-
```
|
166
104
|
|
167
|
-
|
105
|
+
## Ready-To-Use Public Domain (Free) Factbook Datasets
|
168
106
|
|
107
|
+
[factbook/factbook.json](https://github.com/factbook/factbook.json) - open (public domain)
|
108
|
+
factbook country profiles in JSON for all the world's countries (note: using the original
|
109
|
+
/ official two-letter GEC (formerly FIPS) codes and NOT the ISO codes - you might be used to for country codes e.g. Austria is `au.json` and NOT `at.json`,
|
110
|
+
Germany is `gm.json` and NOT `de.json` so on)
|
169
111
|
|
170
112
|
|
171
113
|
|
data/Rakefile
CHANGED
@@ -18,10 +18,10 @@ Hoe.spec 'factbook-readers' do
|
|
18
18
|
self.history_file = 'CHANGELOG.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['
|
22
|
-
['
|
21
|
+
['factbook-codes' ],
|
22
|
+
['factbook-fields' ],
|
23
23
|
['webget'],
|
24
|
-
['nokogiri'],
|
24
|
+
## ['nokogiri'],
|
25
25
|
## ['activerecord'] # note: will include activesupport,etc.
|
26
26
|
]
|
27
27
|
|
data/lib/factbook-readers.rb
CHANGED
@@ -1,13 +1,8 @@
|
|
1
|
-
|
2
|
-
## require 'props'
|
1
|
+
require 'factbook-fields'
|
3
2
|
|
4
|
-
|
3
|
+
## more 3rd party gems/libs
|
4
|
+
## require 'props'
|
5
5
|
require 'webget'
|
6
|
-
require 'csvreader'
|
7
|
-
|
8
|
-
|
9
|
-
require 'nokogiri'
|
10
|
-
|
11
6
|
|
12
7
|
|
13
8
|
|
@@ -15,40 +10,10 @@ require 'nokogiri'
|
|
15
10
|
require 'factbook-readers/version' # let it always go first
|
16
11
|
|
17
12
|
|
18
|
-
require 'factbook-readers/codes'
|
19
|
-
require 'factbook-readers/comparisons'
|
20
|
-
|
21
|
-
|
22
|
-
## note: make codes, comparisons available
|
23
|
-
module Factbook
|
24
|
-
## note: load on demand only builtin codes, comparisons, etc.
|
25
|
-
## for now
|
26
|
-
def self.codes
|
27
|
-
@@codes ||= Codes.read_csv( "#{Factbook::Module::Readers.root}/data/codes.csv" );
|
28
|
-
end
|
29
|
-
def self.comparisons
|
30
|
-
@@comparisons ||= Comparisons.read_csv( "#{Factbook::Module::Readers.root}/data/comparisons.csv" )
|
31
|
-
end
|
32
|
-
end # module Factbook
|
33
13
|
|
34
|
-
|
35
|
-
|
36
|
-
require 'factbook-readers/utils'
|
37
|
-
require 'factbook-readers/utils_info'
|
38
|
-
require 'factbook-readers/sanitizer'
|
39
|
-
require 'factbook-readers/normalize'
|
40
|
-
require 'factbook-readers/builder_item'
|
41
|
-
require 'factbook-readers/builder'
|
42
|
-
require 'factbook-readers/builder_json'
|
43
|
-
require 'factbook-readers/page'
|
14
|
+
require 'factbook-readers/convert'
|
44
15
|
require 'factbook-readers/page_info'
|
45
|
-
require 'factbook-readers/
|
46
|
-
require 'factbook-readers/subsect'
|
47
|
-
|
48
|
-
|
49
|
-
require 'factbook-readers/reader_json'
|
50
|
-
|
51
|
-
require 'factbook-readers/table' ## e.g. TableReader
|
16
|
+
require 'factbook-readers/page'
|
52
17
|
|
53
18
|
require 'factbook-readers/counter'
|
54
19
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
def convert_cia( cia )
|
4
|
+
## convert from "raw" on-the-wire cia format to
|
5
|
+
## "standard" compact "classic" format
|
6
|
+
|
7
|
+
data = {}
|
8
|
+
|
9
|
+
cia['categories'].each do |cia_cat|
|
10
|
+
cat = data[ cia_cat['title'] ] = {}
|
11
|
+
cia_cat['fields'].each do |cia_field|
|
12
|
+
field = cat[ cia_field['name'] ] = {}
|
13
|
+
if cia_field['subfields']
|
14
|
+
cia_field['subfields'].each do |cia_subfield|
|
15
|
+
subfield = field[ cia_subfield['name'] ] = {}
|
16
|
+
subfield[ 'text' ] = cia_subfield['content']
|
17
|
+
end
|
18
|
+
|
19
|
+
puts "== #{cia_cat['title']} / #{cia_field['name']} - skipping field content (w/ subfields):"
|
20
|
+
puts " >#{cia_field['content']}<"
|
21
|
+
puts " ?? same as:"
|
22
|
+
cia_field['subfields'].each do |cia_subfield|
|
23
|
+
puts " #{cia_subfield['name']}: >#{cia_subfield['content']}<"
|
24
|
+
end
|
25
|
+
|
26
|
+
else
|
27
|
+
field[ 'text' ] = cia_field['content']
|
28
|
+
end
|
29
|
+
|
30
|
+
if cia_field[ 'field_note' ]
|
31
|
+
field[ 'note' ] = cia_field[ 'field_note' ]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
data
|
37
|
+
end
|
@@ -9,20 +9,20 @@ def initialize
|
|
9
9
|
@data = {}
|
10
10
|
end
|
11
11
|
|
12
|
-
def count( page )
|
12
|
+
def count( code, page )
|
13
13
|
|
14
14
|
## walk page data hash
|
15
15
|
# add nodes to data
|
16
16
|
|
17
|
-
walk(
|
17
|
+
walk( code, page.to_h, @data )
|
18
18
|
end
|
19
19
|
|
20
20
|
|
21
21
|
private
|
22
|
-
def walk(
|
22
|
+
def walk( code, hin, hout )
|
23
23
|
hin.each do |k,v|
|
24
|
-
if v.is_a? Hash
|
25
|
-
hout2 = hout[k]
|
24
|
+
if v.is_a?( Hash )
|
25
|
+
hout2 = hout[k] ||= { count: 0, codes: '' }
|
26
26
|
|
27
27
|
hout2[ :count ] += 1
|
28
28
|
|
@@ -32,12 +32,10 @@ def walk( page, hin, hout )
|
|
32
32
|
codes = hout2[ :codes ]
|
33
33
|
if codes ## note: might got deleted if passed treshhold (e.g. 9 entries)
|
34
34
|
codes << ' ' unless codes.empty? ## add separator (space for now)
|
35
|
-
codes <<
|
35
|
+
codes << code
|
36
36
|
hout2[ :codes ] = codes
|
37
37
|
end
|
38
|
-
|
39
|
-
hout[k] = hout2
|
40
|
-
walk( page, v, hout2 )
|
38
|
+
walk( code, v, hout2 )
|
41
39
|
end
|
42
40
|
end
|
43
41
|
end
|
@@ -5,23 +5,12 @@ module Factbook
|
|
5
5
|
class Page
|
6
6
|
include LogUtils::Logging
|
7
7
|
|
8
|
-
attr_reader :
|
9
|
-
attr_reader :info ## meta info e.g. country_code, country_name, region_name, last_updated, etc.
|
10
|
-
attr_reader :data ## "plain" access with vanilla hash
|
11
|
-
|
8
|
+
attr_reader :info ## meta info e.g. country_code, country_name, region_name, updated, etc.
|
12
9
|
|
13
10
|
## standard version (note: requires https)
|
14
|
-
SITE_BASE =
|
15
|
-
|
11
|
+
SITE_BASE = "https://www.cia.gov/the-world-factbook/geos/{code}.json"
|
16
12
|
|
17
|
-
def self.parse( html ) ## parse html from string
|
18
|
-
new( html: html )
|
19
|
-
end
|
20
13
|
|
21
|
-
def self.read( path )
|
22
|
-
html = File.open( path, 'r:utf-8' ) { |f| f.read }
|
23
|
-
new( html: html )
|
24
|
-
end
|
25
14
|
|
26
15
|
def self.parse_json( json ) ## parse json from string
|
27
16
|
new( json: json )
|
@@ -36,87 +25,78 @@ class Page
|
|
36
25
|
new( code, cache: cache )
|
37
26
|
end
|
38
27
|
|
39
|
-
## some convenience alias(es)
|
40
|
-
class << self
|
41
|
-
alias_method :read_html, :read
|
42
|
-
alias_method :parse_html, :parse
|
43
|
-
end
|
44
28
|
|
45
29
|
|
46
30
|
def initialize( code=nil,
|
47
31
|
json: nil,
|
48
|
-
html: nil,
|
49
32
|
cache: false,
|
50
33
|
info: nil )
|
51
34
|
if json
|
52
35
|
## note: assumes json is (still) a string/text
|
53
36
|
## (NOT yet parsed to structured data)
|
54
|
-
b =
|
55
|
-
else ## assume
|
56
|
-
if html
|
57
|
-
## for debugging and testing allow "custom" passed-in html page
|
58
|
-
else
|
37
|
+
b = ProfileBuilder.new( json )
|
38
|
+
else ## assume "raw" json dataset
|
59
39
|
## allow passing in code struct too - just use/pluck two-letter code from struct !!!
|
60
40
|
code = code.code if code.is_a?( Codes::Code )
|
61
41
|
|
62
42
|
raise ArgumentError, "two letter code (e.g. au) required to download page & build page url" if code.nil?
|
63
43
|
url = SITE_BASE.sub( '{code}', code )
|
64
44
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
45
|
+
raw_data = if cache && Webcache.exist?( url )
|
46
|
+
text = Webcache.read( url ) ## for debugging - read from cache
|
47
|
+
JSON.parse( text )
|
48
|
+
else
|
49
|
+
download_data( url )
|
50
|
+
end
|
51
|
+
|
52
|
+
## meta info from raw date - example:
|
53
|
+
## "name": "Aruba",
|
54
|
+
## "code": "AA",
|
55
|
+
## "region": "Central America",
|
56
|
+
## "published": "2021-01-25 09:07:08 -0500",
|
57
|
+
## "updated": "2021-01-22 14:38:14 -0500",
|
58
|
+
##
|
59
|
+
## note: published is NOT before updated (like an alias for created) BUT is often older/later than updated - why!?
|
60
|
+
|
61
|
+
@info = PageInfo.new
|
62
|
+
|
63
|
+
@info.country_code = raw_data['code'].downcase
|
64
|
+
@info.country_name = raw_data['name']
|
65
|
+
@info.region_name = raw_data['region']
|
66
|
+
|
67
|
+
## note: just parse year,month,day for now (skip hours,minutes,etc.)
|
68
|
+
@info.published = Date.strptime( raw_data['published'], '%Y-%m-%d' )
|
69
|
+
@info.updated = Date.strptime( raw_data['updated'], '%Y-%m-%d' )
|
70
|
+
|
71
|
+
data = convert_cia( raw_data )
|
72
|
+
b = ProfileBuilder.new( data )
|
72
73
|
end
|
73
74
|
|
74
|
-
@
|
75
|
-
@info = b.info
|
75
|
+
@profile = b.profile
|
76
76
|
|
77
77
|
## todo/fix/quick hack:
|
78
78
|
## check for info opts - lets you overwrite page info
|
79
79
|
## -- use proper header to setup page info - why, why not??
|
80
80
|
@info = info if info
|
81
|
-
|
82
|
-
|
83
|
-
@data = {}
|
84
|
-
@sects.each do |sect|
|
85
|
-
@data[ sect.title ] = sect.data
|
86
|
-
end
|
87
81
|
end
|
88
82
|
|
89
83
|
|
90
|
-
def to_json( minify: false ) ## convenience helper for data.to_json; note: pretty print by default!
|
91
|
-
if minify
|
92
|
-
data.to_json
|
93
|
-
else ## note: pretty print by default!
|
94
|
-
JSON.pretty_generate( data )
|
95
|
-
end
|
96
|
-
end
|
97
84
|
|
98
|
-
|
99
|
-
def [](key)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
# page.data['geo']
|
104
|
-
|
105
|
-
## fix: use delegate data, [] from forwardable lib - why?? why not??
|
106
|
-
|
107
|
-
data[key]
|
108
|
-
end
|
85
|
+
## convenience helpers - forward to profile
|
86
|
+
def [](key) @profile[key]; end
|
87
|
+
def to_h() @profile.to_h; end
|
88
|
+
def to_json( minify: false ) @profile.to_json( minify: minify ); end
|
89
|
+
def size() @profile.size; end
|
109
90
|
|
110
91
|
|
111
92
|
private
|
112
|
-
def
|
113
|
-
response = Webget.
|
93
|
+
def download_data( url )
|
94
|
+
response = Webget.call( url )
|
114
95
|
|
115
96
|
## note: exit on get / fetch error - do NOT continue for now - why? why not?
|
116
97
|
exit 1 if response.status.nok? ## e.g. HTTP status code != 200
|
117
98
|
|
118
|
-
|
119
|
-
response.text
|
99
|
+
response.json
|
120
100
|
end
|
121
101
|
end # class Page
|
122
102
|
end # module Factbook
|