factbook 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/lib/factbook/sanitizer.rb +107 -24
- data/lib/factbook/utils_info.rb +37 -10
- data/lib/factbook/version.rb +3 -3
- data/test/data/src/au-2015-09-24.html +2006 -0
- data/test/data/src/au.html +658 -2006
- data/test/data/src/be-2015-09-24.html +2011 -0
- data/test/data/src/be.html +648 -2011
- data/test/test_attribs.rb +33 -28
- data/test/test_fields.rb +3 -3
- data/test/test_importer.rb +6 -5
- data/test/test_sanitizer.rb +5 -5
- data/test/test_sanitizer_regex.rb +64 -0
- metadata +8 -5
data/test/test_attribs.rb
CHANGED
@@ -12,71 +12,76 @@ class TestAttribs < MiniTest::Test
|
|
12
12
|
def read_test_page( code )
|
13
13
|
html = File.read( "#{Factbook.root}/test/data/src/#{code}.html" )
|
14
14
|
page = Factbook::Page.new( code, html: html )
|
15
|
-
page
|
15
|
+
page
|
16
16
|
end
|
17
17
|
|
18
18
|
def read_test_page_from_json( code )
|
19
19
|
json = File.read( "#{Factbook.root}/test/data/json/#{code}.json" )
|
20
20
|
page = Factbook::Page.new( code, json: json )
|
21
|
-
page
|
21
|
+
page
|
22
22
|
end
|
23
23
|
|
24
24
|
|
25
25
|
def test_au_from_html
|
26
26
|
page = read_test_page( 'au' ) # note: use builtin test page (do NOT fetch via internet)
|
27
|
-
|
27
|
+
|
28
28
|
assert_page_au( page )
|
29
29
|
end
|
30
30
|
|
31
31
|
def xxx_test_au_from_json
|
32
|
-
|
32
|
+
|
33
33
|
## todo/fix: check some issue with newlines? when comparing background or something ???
|
34
34
|
page = read_test_page_from_json( 'au' )
|
35
|
-
|
35
|
+
|
36
36
|
assert_page_au( page )
|
37
37
|
end
|
38
38
|
|
39
|
-
private
|
39
|
+
private
|
40
40
|
def assert_page_au( page )
|
41
41
|
########
|
42
42
|
## Introduction
|
43
|
-
assert_equal page.background, "Once the center of power for the large Austro-Hungarian Empire, Austria was reduced to a small republic after its defeat in World War I. Following annexation by Nazi Germany in 1938 and subsequent occupation by the victorious Allies in 1945, Austria's status remained unclear for a decade. A State Treaty signed in 1955 ended the occupation, recognized Austria's independence, and forbade unification with Germany. A constitutional law that same year declared the country's \"perpetual neutrality\" as a condition for Soviet military withdrawal. The Soviet Union's collapse in 1991 and Austria's entry into the
|
44
|
-
|
43
|
+
assert_equal page.background, "Once the center of power for the large Austro-Hungarian Empire, Austria was reduced to a small republic after its defeat in World War I. Following annexation by Nazi Germany in 1938 and subsequent occupation by the victorious Allies in 1945, Austria's status remained unclear for a decade. A State Treaty signed in 1955 ended the occupation, recognized Austria's independence, and forbade unification with Germany. A constitutional law that same year declared the country's \"perpetual neutrality\" as a condition for Soviet military withdrawal. The Soviet Union's collapse in 1991 and Austria's entry into the EU in 1995 have altered the meaning of this neutrality. A prosperous, democratic country, Austria entered the EU Economic and Monetary Union in 1999."
|
44
|
+
|
45
45
|
###########
|
46
46
|
## Geography
|
47
|
-
assert_equal page.area, "83,871 sq km"
|
47
|
+
assert_equal page.area, "83,871 sq km"
|
48
48
|
assert_equal page.area_land, "82,445 sq km"
|
49
49
|
assert_equal page.area_water, "1,426 sq km"
|
50
50
|
assert_equal page.area_note, nil
|
51
51
|
assert_equal page.area_comparative, "about the size of South Carolina; slightly more than two-thirds the size of Pennsylvania"
|
52
|
-
assert_equal page.climate, "temperate; continental, cloudy; cold winters with frequent rain and some snow in lowlands and snow in mountains; moderate summers with occasional showers"
|
53
|
-
assert_equal page.terrain, "mostly mountains (Alps) in the west and south; mostly flat or gently sloping along the eastern and northern margins"
|
54
|
-
|
55
|
-
|
56
|
-
assert_equal page.
|
52
|
+
assert_equal page.climate, "temperate; continental, cloudy; cold winters with frequent rain and some snow in lowlands and snow in mountains; moderate summers with occasional showers"
|
53
|
+
assert_equal page.terrain, "mostly mountains (Alps) in the west and south; mostly flat or gently sloping along the eastern and northern margins"
|
54
|
+
|
55
|
+
# was:
|
56
|
+
## assert_equal page.elevation_lowest, "Neusiedler See 115 m"
|
57
|
+
## assert_equal page.elevation_highest, "Grossglockner 3,798 m"
|
58
|
+
## new ????
|
59
|
+
## assert_equal page.elevation_extremes, "lowest point: Neusiedler See 115 m ++ highest point: Grossglockner 3,798 m"
|
60
|
+
|
61
|
+
assert_equal page.resources, "oil, coal, lignite, timber, iron ore, copper, zinc, antimony, magnesite, tungsten, graphite, salt, hydropower"
|
57
62
|
|
58
63
|
###################
|
59
64
|
## People and Society
|
60
65
|
assert_equal page.languages, "German (official nationwide) 88.6%, Turkish 2.3%, Serbian 2.2%, Croatian (official in Burgenland) 1.6%, other (includes Slovene, official in South Carinthia, and Hungarian, official in Burgenland) 5.3% (2001 est.)"
|
61
|
-
assert_equal page.religions, "Catholic 73.8% (includes Roman Catholic 73.6%, other Catholic .2%), Protestant 4.9%, Muslim 4.2%, Orthodox 2.2%, other 0.8% (includes other Christian), none 12%, unspecified 2% (2001 est.)"
|
62
|
-
assert_equal page.population, "8,
|
63
|
-
assert_equal page.population_growth, "0.
|
64
|
-
assert_equal page.birth_rate, "9.
|
65
|
-
assert_equal page.death_rate, "9.
|
66
|
-
assert_equal page.migration_rate, "5.
|
66
|
+
assert_equal page.religions, "Catholic 73.8% (includes Roman Catholic 73.6%, other Catholic 0.2%), Protestant 4.9%, Muslim 4.2%, Orthodox 2.2%, other 0.8% (includes other Christian), none 12%, unspecified 2% (2001 est.)"
|
67
|
+
assert_equal page.population, "8,711,770 (July 2016 est.)"
|
68
|
+
assert_equal page.population_growth, "0.51% (2016 est.)"
|
69
|
+
assert_equal page.birth_rate, "9.5 births/1,000 population (2016 est.)"
|
70
|
+
assert_equal page.death_rate, "9.5 deaths/1,000 population (2016 est.)"
|
71
|
+
assert_equal page.migration_rate, "5.2 migrant(s)/1,000 population (2016 est.)"
|
67
72
|
assert_equal page.major_cities, "VIENNA (capital) 1.753 million (2015)"
|
68
73
|
|
69
74
|
|
70
75
|
####################
|
71
76
|
## Economy
|
72
|
-
assert_equal page.economy_overview, "Austria, with its well-developed market economy, skilled labor force, and high standard of living, is closely tied to other EU economies, especially Germany's. Its economy features a large service sector, a relatively sound industrial sector, and a small, but highly developed agricultural sector. Economic growth was anemic at less than 0.5% in 2013 and 2014, and growth in 2015 is not expected to exceed 0.5%. Austria’s 5.6% unemployment rate, while low by European standards, is at an historic high for Austria. Without extensive vocational training programs and generous early retirement, the unemployment rate would be even higher. Public finances have not stabilized even after a 2012 austerity package of expenditure cuts and new revenues. On the contrary, in 2014, the government created a “bad bank” for the troubled nationalized “Hypo Alpe Adria” bank, pushing the budget deficit up by 0.9% of GDP to 2.4% and public debt to 84.5% of the GDP. Although Austria's fiscal position compares favorably with other euro-zone countries, it faces several external risks, such as Austrian banks' continued exposure to Central and Eastern Europe, repercussions from the Hypo Alpe Adria bank collapse, political and economic uncertainties caused by the European sovereign debt crisis, the current crisis in Russia/Ukraine, the recent appreciation of the Swiss Franc, and political developments in Hungary."
|
73
|
-
assert_equal page.gdp_ppp, "$
|
74
|
-
assert_equal page.gdp_ppp_note, "data are in
|
75
|
-
assert_equal page.gdp, "$
|
76
|
-
assert_equal page.gdp_growth, "0.
|
77
|
-
assert_equal page.gdp_ppp_capita, "$
|
78
|
-
assert_equal page.gdp_ppp_capita_note, "data are in
|
79
|
-
assert_equal page.saving, "25% of GDP (
|
77
|
+
## assert_equal page.economy_overview, "Austria, with its well-developed market economy, skilled labor force, and high standard of living, is closely tied to other EU economies, especially Germany's. Its economy features a large service sector, a relatively sound industrial sector, and a small, but highly developed agricultural sector. Economic growth was anemic at less than 0.5% in 2013 and 2014, and growth in 2015 is not expected to exceed 0.5%. Austria’s 5.6% unemployment rate, while low by European standards, is at an historic high for Austria. Without extensive vocational training programs and generous early retirement, the unemployment rate would be even higher. Public finances have not stabilized even after a 2012 austerity package of expenditure cuts and new revenues. On the contrary, in 2014, the government created a “bad bank” for the troubled nationalized “Hypo Alpe Adria” bank, pushing the budget deficit up by 0.9% of GDP to 2.4% and public debt to 84.5% of the GDP. Although Austria's fiscal position compares favorably with other euro-zone countries, it faces several external risks, such as Austrian banks' continued exposure to Central and Eastern Europe, repercussions from the Hypo Alpe Adria bank collapse, political and economic uncertainties caused by the European sovereign debt crisis, the current crisis in Russia/Ukraine, the recent appreciation of the Swiss Franc, and political developments in Hungary."
|
78
|
+
assert_equal page.gdp_ppp, "$405.1 billion (2015 est.) ++ $401.6 billion (2014 est.) ++ $400.2 billion (2013 est.)"
|
79
|
+
assert_equal page.gdp_ppp_note, "data are in 2015 US dollars"
|
80
|
+
assert_equal page.gdp, "$374.3 billion (2015 est.)"
|
81
|
+
assert_equal page.gdp_growth, "0.9% (2015 est.) ++ 0.4% (2014 est.) ++ 0.3% (2013 est.)"
|
82
|
+
assert_equal page.gdp_ppp_capita, "$47,000 (2015 est.) ++ $47,000 (2014 est.) ++ $47,200 (2013 est.)"
|
83
|
+
assert_equal page.gdp_ppp_capita_note, "data are in 2015 US dollars"
|
84
|
+
assert_equal page.saving, "25% of GDP (2015 est.) ++ 24.7% of GDP (2014 est.) ++ 25.2% of GDP (2013 est.)"
|
80
85
|
end
|
81
86
|
|
82
87
|
end # class TestAttribs
|
data/test/test_fields.rb
CHANGED
@@ -12,15 +12,15 @@ class TestFields < MiniTest::Test
|
|
12
12
|
def read_test_page( code )
|
13
13
|
html = File.read( "#{Factbook.root}/test/data/src/#{code}.html" )
|
14
14
|
page = Factbook::Page.new( code, html: html )
|
15
|
-
page
|
15
|
+
page
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_fields_full
|
19
19
|
## Factbook::Page.new( 'au', fields: 'full' )
|
20
20
|
page = read_test_page( 'au' ) # use builtin test page (do NOT fetch via internet)
|
21
21
|
|
22
|
-
assert_equal '-
|
23
|
-
assert_equal '
|
22
|
+
assert_equal '-1.1% of GDP (2015 est.)', page['Economy']['Budget surplus (+) or deficit (-)']['text']
|
23
|
+
assert_equal '0.7%', page['Economy']['Labor force - by occupation']['agriculture']['text']
|
24
24
|
|
25
25
|
assert_equal 'Enns, Krems, Linz, Vienna (Danube)', page['Transportation']['Ports and terminals']['river port(s)']['text']
|
26
26
|
end
|
data/test/test_importer.rb
CHANGED
@@ -24,22 +24,23 @@ class TestImporter < MiniTest::Test
|
|
24
24
|
def read_test_page( code )
|
25
25
|
html = File.read( "#{Factbook.root}/test/data/src/#{code}.html" )
|
26
26
|
page = Factbook::Page.new( code, html: html )
|
27
|
-
page
|
27
|
+
page
|
28
28
|
end
|
29
29
|
|
30
|
-
|
30
|
+
|
31
|
+
def to_be_done_test_au_fix_me
|
31
32
|
page = read_test_page( 'au' ) # use builtin test page (do NOT fetch via internet)
|
32
33
|
|
33
34
|
setup_in_memory_db()
|
34
|
-
|
35
|
+
|
35
36
|
im = Factbook::Importer.new
|
36
37
|
im.import( page )
|
37
|
-
|
38
|
+
|
38
39
|
rec = Factbook::Fact.find_by! code: 'au'
|
39
40
|
|
40
41
|
###########
|
41
42
|
## Geography
|
42
|
-
assert_equal 83_871, rec.area
|
43
|
+
assert_equal 83_871, rec.area
|
43
44
|
assert_equal 82_445, rec.area_land
|
44
45
|
assert_equal 1_426, rec.area_water
|
45
46
|
|
data/test/test_sanitizer.rb
CHANGED
@@ -11,14 +11,15 @@ require 'helper'
|
|
11
11
|
class TestSanitizer < MiniTest::Test
|
12
12
|
|
13
13
|
def test_sanitize
|
14
|
-
|
15
|
-
['au','be'].each do |
|
14
|
+
|
15
|
+
['au','be'].each do |cnty|
|
16
|
+
|
16
17
|
## use/fix: ASCII-8BIT (e.g.keep as is) -???
|
17
|
-
html_ascii = File.read( "#{Factbook.root}/test/data/src/#{
|
18
|
+
html_ascii = File.read( "#{Factbook.root}/test/data/src/#{cnty}.html" ) ## fix/todo: use ASCII8BIT/binary reader ??
|
18
19
|
|
19
20
|
html, info, errors = Factbook::Sanitizer.new.sanitize( html_ascii )
|
20
21
|
|
21
|
-
File.open( "./tmp/#{
|
22
|
+
File.open( "./tmp/#{cnty}.profile.html", 'w' ) do |f|
|
22
23
|
f.write "** info:\n"
|
23
24
|
f.write info.inspect + "\n\n"
|
24
25
|
f.write "** errors:\n"
|
@@ -32,4 +33,3 @@ class TestSanitizer < MiniTest::Test
|
|
32
33
|
end
|
33
34
|
|
34
35
|
end # class TestSanitizer
|
35
|
-
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_sanitizer_regex.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestSanitizerRegex < MiniTest::Test
|
12
|
+
|
13
|
+
def test_area_map
|
14
|
+
|
15
|
+
html =<<HTML
|
16
|
+
<div class='disTable areaComp'>
|
17
|
+
<span class='category tCell' style='margin-bottom:0px; vertical-align:bottom;'>Area comparison map:</span>
|
18
|
+
<span class="tCell"><a data-toggle="modal" href="#areaCompModal"><img src="../graphics/areacomparison_icon.jpg" border="0" style="cursor:pointer; border: 0px solid #CCC;"></a></span></div>
|
19
|
+
|
20
|
+
<div class="modal fade" id="areaCompModal" role="dialog">
|
21
|
+
<div class="wfb-modal-dialog">
|
22
|
+
<div class="modal-content" >
|
23
|
+
<div class="wfb-modal-header" style="border-radius: 4px; font-family: Verdana,Arial,sans-serif; font-size: 14px !important; font-weight: bold; padding: 0.4em 16px 0.4em 1em; background: #cccccc url("..images/ui-bg_highlight-soft_75_cccccc_1x100.png") repeat-x scroll 50% 50%;" >
|
24
|
+
<span style="font-size: 14px !important; margin: 0.1em 16px 0.1em 0;" class="modal-title wfb-title">The World Factbook</span><span style="float: right; margin-top: -4px;">
|
25
|
+
<button type="button" class="close" title="close" data-dismiss="modal">×</button></span>
|
26
|
+
</div>
|
27
|
+
<div class="wfb-modal-body">
|
28
|
+
...
|
29
|
+
<div id='field'
|
30
|
+
HTML
|
31
|
+
|
32
|
+
m = Factbook::Sanitizer::AREA_COMP_CATEGORY_REGEX.match( html )
|
33
|
+
pp m
|
34
|
+
|
35
|
+
assert m.nil? == false
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def test_pop_pyramid
|
40
|
+
|
41
|
+
html =<<HTML
|
42
|
+
<div class='disTable popPyramid'>
|
43
|
+
<span class='category tCell' style='margin-bottom:0px; vertical-align:bottom;'>population pyramid:</span>
|
44
|
+
<span class="tCell"><a data-toggle="modal" href="#popPyramidModal"><img title="" src="../graphics/poppyramid_icon.jpg" style="cursor:pointer; border: 0px solid #CCC;"></span></a></div>
|
45
|
+
|
46
|
+
<div class="modal fade" id="popPyramidModal" role="dialog">
|
47
|
+
<div class="wfb-modal-dialog">
|
48
|
+
<div class="modal-content" >
|
49
|
+
<div class="wfb-modal-header" style="border-radius: 4px; font-family: Verdana,Arial,sans-serif; font-size: 14px !important; font-weight: bold; padding: 0.4em 16px 0.4em 1em; background: #cccccc url("..images/ui-bg_highlight-soft_75_cccccc_1x100.png") repeat-x scroll 50% 50%;" >
|
50
|
+
<span style="font-size: 14px !important; margin: 0.1em 16px 0.1em 0;" class="modal-title wfb-title">The World Factbook</span><span style="float: right; margin-top: -4px;">
|
51
|
+
<button type="button" class="close" title="close" data-dismiss="modal">×</button></span>
|
52
|
+
</div>
|
53
|
+
<div class="wfb-modal-body">
|
54
|
+
...
|
55
|
+
<div id='field'
|
56
|
+
HTML
|
57
|
+
|
58
|
+
m = Factbook::Sanitizer::POP_PYRAMID_CATEGORY_REGEX.match( html )
|
59
|
+
pp m
|
60
|
+
|
61
|
+
assert m.nil? == false
|
62
|
+
end
|
63
|
+
|
64
|
+
end # class TestSanitizerRegex
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: factbook
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logutils
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '3.
|
89
|
+
version: '3.15'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3.
|
96
|
+
version: '3.15'
|
97
97
|
description: factbook - scripts for the world factbook (get open structured data e.g
|
98
98
|
JSON etc.)
|
99
99
|
email: openmundi@googlegroups.com
|
@@ -148,7 +148,9 @@ files:
|
|
148
148
|
- test/data/be.html
|
149
149
|
- test/data/be.yml
|
150
150
|
- test/data/json/au.json
|
151
|
+
- test/data/src/au-2015-09-24.html
|
151
152
|
- test/data/src/au.html
|
153
|
+
- test/data/src/be-2015-09-24.html
|
152
154
|
- test/data/src/be.html
|
153
155
|
- test/helper.rb
|
154
156
|
- test/test_attribs.rb
|
@@ -166,6 +168,7 @@ files:
|
|
166
168
|
- test/test_normalize.rb
|
167
169
|
- test/test_page.rb
|
168
170
|
- test/test_sanitizer.rb
|
171
|
+
- test/test_sanitizer_regex.rb
|
169
172
|
homepage: https://github.com/worlddb/factbook
|
170
173
|
licenses:
|
171
174
|
- Public Domain
|
@@ -188,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
188
191
|
version: '0'
|
189
192
|
requirements: []
|
190
193
|
rubyforge_project:
|
191
|
-
rubygems_version: 2.
|
194
|
+
rubygems_version: 2.6.7
|
192
195
|
signing_key:
|
193
196
|
specification_version: 4
|
194
197
|
summary: factbook - scripts for the world factbook (get open structured data e.g JSON
|