factbook-readers 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Manifest.txt +56 -0
  4. data/README.md +196 -0
  5. data/Rakefile +34 -0
  6. data/data/attributes.yml +337 -0
  7. data/data/categories.csv +164 -0
  8. data/data/codes.csv +262 -0
  9. data/data/codesxref.csv +280 -0
  10. data/data/comparisons.csv +75 -0
  11. data/lib/factbook-readers.rb +59 -0
  12. data/lib/factbook-readers/attributes.rb +74 -0
  13. data/lib/factbook-readers/builder.rb +212 -0
  14. data/lib/factbook-readers/builder_item.rb +185 -0
  15. data/lib/factbook-readers/builder_json.rb +79 -0
  16. data/lib/factbook-readers/codes.rb +122 -0
  17. data/lib/factbook-readers/comparisons.rb +50 -0
  18. data/lib/factbook-readers/counter.rb +48 -0
  19. data/lib/factbook-readers/normalize.rb +43 -0
  20. data/lib/factbook-readers/page.rb +148 -0
  21. data/lib/factbook-readers/page_info.rb +12 -0
  22. data/lib/factbook-readers/reader_json.rb +51 -0
  23. data/lib/factbook-readers/sanitizer.rb +307 -0
  24. data/lib/factbook-readers/sect.rb +29 -0
  25. data/lib/factbook-readers/subsect.rb +18 -0
  26. data/lib/factbook-readers/table.rb +52 -0
  27. data/lib/factbook-readers/utils.rb +47 -0
  28. data/lib/factbook-readers/utils_info.rb +129 -0
  29. data/lib/factbook-readers/version.rb +24 -0
  30. data/lib/factbook/readers.rb +5 -0
  31. data/test/data/au.html +579 -0
  32. data/test/data/au.yml +8 -0
  33. data/test/data/be.html +596 -0
  34. data/test/data/be.yml +8 -0
  35. data/test/data/json/au.json +892 -0
  36. data/test/data/src/ag.html +716 -0
  37. data/test/data/src/au-2015-09-24.html +2006 -0
  38. data/test/data/src/au.html +658 -0
  39. data/test/data/src/be-2015-09-24.html +2011 -0
  40. data/test/data/src/be.html +648 -0
  41. data/test/helper.rb +11 -0
  42. data/test/test_attribs.rb +87 -0
  43. data/test/test_attribs_def.rb +20 -0
  44. data/test/test_builder.rb +35 -0
  45. data/test/test_codes.rb +76 -0
  46. data/test/test_comparisons.rb +19 -0
  47. data/test/test_convert.rb +30 -0
  48. data/test/test_counter.rb +31 -0
  49. data/test/test_fields.rb +52 -0
  50. data/test/test_importer.rb +56 -0
  51. data/test/test_item_builder.rb +99 -0
  52. data/test/test_json.rb +45 -0
  53. data/test/test_json_builder.rb +25 -0
  54. data/test/test_normalize.rb +23 -0
  55. data/test/test_page.rb +38 -0
  56. data/test/test_sanitizer.rb +39 -0
  57. data/test/test_sanitizer_regex.rb +89 -0
  58. metadata +196 -0
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_json.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestJson < MiniTest::Test
12
+
13
+
14
+ def test_json
15
+ Dir.mkdir( 'tmp' ) unless Dir.exists?( 'tmp' )
16
+
17
+ codes = [ 'au',
18
+ 'ag',
19
+ 'be',
20
+ #'br',
21
+ #'mx',
22
+ #'ls',
23
+ #'vt',
24
+ #'ee',
25
+ #'xx'
26
+ ]
27
+
28
+ codes.each do |code|
29
+
30
+ html = File.read( "#{Factbook.root}/test/data/src/#{code}.html" )
31
+ page = Factbook::Page.new( code, html: html )
32
+
33
+ h = page.data
34
+ pp h
35
+
36
+ ### save to json
37
+ puts "saving a copy to #{code}.json for debugging"
38
+ File.open( "tmp/#{code}.json", 'w' ) do |f|
39
+ f.write JSON.pretty_generate( h )
40
+ ## f.write page.to_json
41
+ end
42
+ end
43
+ end
44
+
45
+ end # class TestJson
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_json_builder.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestJsonBuilder < MiniTest::Test
12
+
13
+ def test_read
14
+ code = 'au'
15
+ b = Factbook::JsonBuilder.from_file( "#{Factbook.root}/test/data/json/#{code}.json" )
16
+
17
+ assert_equal 10, b.sects.size
18
+ assert_equal 1, b.sects[0].subsects.size ## e.g. Introduction/Background
19
+ assert_equal 'Central Europe, north of Italy and Slovenia', b.json['Geography']['Location']['text']
20
+
21
+ assert true ## assume everthing ok
22
+ end
23
+
24
+ end # class TestJsonBuilder
25
+
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_normalize.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestNormalizer < MiniTest::Test
12
+
13
+ include Factbook::NormalizeHelper
14
+
15
+ def test_normalize
16
+ assert_equal 'border countries', normalize_category( 'border countries:' )
17
+ assert_equal 'border countries', normalize_category( 'border countries: ' )
18
+ assert_equal 'border countries', normalize_category( 'border countries (8):' )
19
+ assert_equal 'border countries', normalize_category( 'border countries (10): ' )
20
+ end
21
+
22
+ end # class TestNormalizer
23
+
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_page.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestPage < MiniTest::Test
12
+
13
+
14
+ def test_sects
15
+ pages = [
16
+ [ 'au', 10 ],
17
+ [ 'be', 10 ],
18
+ # [ 'br', 10 ],
19
+ # [ 'ee', 10 ],
20
+ # [ 'mx', 10 ],
21
+ # [ 'xx', 10 ],
22
+ # [ 'ls', 9 ],
23
+ # [ 'vt', 8 ],
24
+ ]
25
+
26
+ pages.each do |rec|
27
+ code = rec[0]
28
+ sects_size = rec[1]
29
+
30
+ html = File.read( "#{Factbook.root}/test/data/src/#{code}.html" )
31
+ page = Factbook::Page.new( code, html: html )
32
+
33
+ assert_equal sects_size, page.sects.size
34
+ end
35
+ end
36
+
37
+
38
+ end # class TestPage
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_sanitizer.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestSanitizer < MiniTest::Test
12
+
13
+ def test_sanitize
14
+
15
+ ## austria (au)
16
+ ## algeria (ag)
17
+ ## belgium (be)
18
+ ## ['au'].each do |cnty|
19
+ ['au','ag','be'].each do |cnty|
20
+
21
+ ## use/fix: ASCII-8BIT (e.g.keep as is) -???
22
+ html_ascii = File.read( "#{Factbook.root}/test/data/src/#{cnty}.html" ) ## fix/todo: use ASCII8BIT/binary reader ??
23
+
24
+ html, info, errors = Factbook::Sanitizer.new.sanitize( html_ascii )
25
+
26
+ File.open( "./tmp/#{cnty}.profile.html", 'w' ) do |f|
27
+ f.write "** info:\n"
28
+ f.write info.inspect + "\n\n"
29
+ f.write "** errors:\n"
30
+ f.write errors.inspect + "\n\n"
31
+ f.write "** html:\n"
32
+ f.write html
33
+ end
34
+ end
35
+
36
+ assert true ## assume everthing ok
37
+ end
38
+
39
+ end # class TestSanitizer
@@ -0,0 +1,89 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_sanitizer_regex.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestSanitizerRegex < MiniTest::Test
12
+
13
+ def test_area_map
14
+
15
+ html =<<HTML
16
+ <div class='disTable areaComp'>
17
+ <span class='category tCell' style='margin-bottom:0px; vertical-align:bottom;'>Area comparison map:</span>
18
+ <span class="tCell"><a data-toggle="modal" href="#areaCompModal"><img src="../graphics/areacomparison_icon.jpg" border="0" style="cursor:pointer; border: 0px solid #CCC;"></a></span></div>
19
+
20
+ <div class="modal fade" id="areaCompModal" role="dialog">
21
+ <div class="wfb-modal-dialog">
22
+ <div class="modal-content" >
23
+ <div class="wfb-modal-header" style="border-radius: 4px; font-family: Verdana,Arial,sans-serif; font-size: 14px !important; font-weight: bold; padding: 0.4em 16px 0.4em 1em; background: #cccccc url("..images/ui-bg_highlight-soft_75_cccccc_1x100.png") repeat-x scroll 50% 50%;" >
24
+ <span style="font-size: 14px !important; margin: 0.1em 16px 0.1em 0;" class="modal-title wfb-title">The World Factbook</span><span style="float: right; margin-top: -4px;">
25
+ <button type="button" class="close" title="close" data-dismiss="modal">&times;</button></span>
26
+ </div>
27
+ <div class="wfb-modal-body">
28
+ ...
29
+ <div id='field'
30
+ HTML
31
+
32
+ m = Factbook::Sanitizer::AREA_COMP_CATEGORY_REGEX.match( html )
33
+ pp m
34
+
35
+ assert m.nil? == false
36
+ end
37
+
38
+
39
+ def test_pop_pyramid
40
+
41
+ html =<<HTML
42
+ <div class='disTable popPyramid'>
43
+ <span class='category tCell' style='margin-bottom:0px; vertical-align:bottom;'>population pyramid:</span>
44
+ <span class="tCell"><a data-toggle="modal" href="#popPyramidModal"><img title="" src="../graphics/poppyramid_icon.jpg" style="cursor:pointer; border: 0px solid #CCC;"></span></a></div>
45
+
46
+ <div class="modal fade" id="popPyramidModal" role="dialog">
47
+ <div class="wfb-modal-dialog">
48
+ <div class="modal-content" >
49
+ <div class="wfb-modal-header" style="border-radius: 4px; font-family: Verdana,Arial,sans-serif; font-size: 14px !important; font-weight: bold; padding: 0.4em 16px 0.4em 1em; background: #cccccc url("..images/ui-bg_highlight-soft_75_cccccc_1x100.png") repeat-x scroll 50% 50%;" >
50
+ <span style="font-size: 14px !important; margin: 0.1em 16px 0.1em 0;" class="modal-title wfb-title">The World Factbook</span><span style="float: right; margin-top: -4px;">
51
+ <button type="button" class="close" title="close" data-dismiss="modal">&times;</button></span>
52
+ </div>
53
+ <div class="wfb-modal-body">
54
+ ...
55
+ <div id='field'
56
+ HTML
57
+
58
+ m = Factbook::Sanitizer::POP_PYRAMID_CATEGORY_REGEX.match( html )
59
+ pp m
60
+
61
+ assert m.nil? == false
62
+ end # method test_pop_pyramid
63
+
64
+
65
+ def test_rel_affiliation
66
+
67
+ html =<<HTML
68
+ <div class='disTable relAffiliation'><span class='category tCell' style='margin-bottom:0px; vertical-align:bottom;'>religious affiliation:</span>
69
+ <span class="tCell"><a data-toggle="modal" href="#relAffiliationModal"><img title="" src="../graphics/middle-east-religion-icon.jpg" style="cursor:pointer; border: 0px solid #CCC;"></span></a></div>
70
+
71
+ <div class="modal fade" id="relAffiliationModal" role="dialog">
72
+ <div class="wfb-modal-dialog">
73
+ <div class="modal-content" >
74
+ <div class="wfb-modal-header" style="border-radius: 4px; font-family: Verdana,Arial,sans-serif; font-size: 14px !important; font-weight: bold; padding: 0.4em 16px 0.4em 1em; background: #cccccc url("..images/ui-bg_highlight-soft_75_cccccc_1x100.png") repeat-x scroll 50% 50%;" >
75
+ <span style="font-size: 14px !important; margin: 0.1em 16px 0.1em 0;" class="modal-title wfb-title">The World Factbook</span><span style="float: right; margin-top: -4px;">
76
+ <button type="button" class="close" title="close" data-dismiss="modal">&times;</button></span>
77
+ </div>
78
+ <div class="wfb-modal-body">
79
+ ...
80
+ <div id='field'
81
+ HTML
82
+
83
+ m = Factbook::Sanitizer::REL_AFFILIATION_CATEGORY_REGEX.match( html )
84
+ pp m
85
+
86
+ assert m.nil? == false
87
+ end # method test_rel_affiliation
88
+
89
+ end # class TestSanitizerRegex
metadata ADDED
@@ -0,0 +1,196 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: factbook-readers
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logutils
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: csvreader
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webget
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '4.0'
76
+ - - "<"
77
+ - !ruby/object:Gem::Version
78
+ version: '7'
79
+ type: :development
80
+ prerelease: false
81
+ version_requirements: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '4.0'
86
+ - - "<"
87
+ - !ruby/object:Gem::Version
88
+ version: '7'
89
+ - !ruby/object:Gem::Dependency
90
+ name: hoe
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '3.22'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '3.22'
103
+ description: factbook-readers - turn thee world factbook country profile pages into
104
+ open structured data e.g JSON
105
+ email: openmundi@googlegroups.com
106
+ executables: []
107
+ extensions: []
108
+ extra_rdoc_files:
109
+ - CHANGELOG.md
110
+ - Manifest.txt
111
+ - README.md
112
+ files:
113
+ - CHANGELOG.md
114
+ - Manifest.txt
115
+ - README.md
116
+ - Rakefile
117
+ - data/attributes.yml
118
+ - data/categories.csv
119
+ - data/codes.csv
120
+ - data/codesxref.csv
121
+ - data/comparisons.csv
122
+ - lib/factbook-readers.rb
123
+ - lib/factbook-readers/attributes.rb
124
+ - lib/factbook-readers/builder.rb
125
+ - lib/factbook-readers/builder_item.rb
126
+ - lib/factbook-readers/builder_json.rb
127
+ - lib/factbook-readers/codes.rb
128
+ - lib/factbook-readers/comparisons.rb
129
+ - lib/factbook-readers/counter.rb
130
+ - lib/factbook-readers/normalize.rb
131
+ - lib/factbook-readers/page.rb
132
+ - lib/factbook-readers/page_info.rb
133
+ - lib/factbook-readers/reader_json.rb
134
+ - lib/factbook-readers/sanitizer.rb
135
+ - lib/factbook-readers/sect.rb
136
+ - lib/factbook-readers/subsect.rb
137
+ - lib/factbook-readers/table.rb
138
+ - lib/factbook-readers/utils.rb
139
+ - lib/factbook-readers/utils_info.rb
140
+ - lib/factbook-readers/version.rb
141
+ - lib/factbook/readers.rb
142
+ - test/data/au.html
143
+ - test/data/au.yml
144
+ - test/data/be.html
145
+ - test/data/be.yml
146
+ - test/data/json/au.json
147
+ - test/data/src/ag.html
148
+ - test/data/src/au-2015-09-24.html
149
+ - test/data/src/au.html
150
+ - test/data/src/be-2015-09-24.html
151
+ - test/data/src/be.html
152
+ - test/helper.rb
153
+ - test/test_attribs.rb
154
+ - test/test_attribs_def.rb
155
+ - test/test_builder.rb
156
+ - test/test_codes.rb
157
+ - test/test_comparisons.rb
158
+ - test/test_convert.rb
159
+ - test/test_counter.rb
160
+ - test/test_fields.rb
161
+ - test/test_importer.rb
162
+ - test/test_item_builder.rb
163
+ - test/test_json.rb
164
+ - test/test_json_builder.rb
165
+ - test/test_normalize.rb
166
+ - test/test_page.rb
167
+ - test/test_sanitizer.rb
168
+ - test/test_sanitizer_regex.rb
169
+ homepage: https://github.com/factbook/factbook
170
+ licenses:
171
+ - Public Domain
172
+ metadata: {}
173
+ post_install_message:
174
+ rdoc_options:
175
+ - "--main"
176
+ - README.md
177
+ require_paths:
178
+ - lib
179
+ required_ruby_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: 2.2.2
184
+ required_rubygems_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - ">="
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ requirements: []
190
+ rubyforge_project:
191
+ rubygems_version: 2.5.2
192
+ signing_key:
193
+ specification_version: 4
194
+ summary: factbook-readers - turn thee world factbook country profile pages into open
195
+ structured data e.g JSON
196
+ test_files: []