factbook 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +0 -61
  4. data/README.md +8 -506
  5. data/Rakefile +4 -9
  6. data/lib/factbook.rb +4 -64
  7. metadata +6 -124
  8. data/data/attributes.yml +0 -337
  9. data/data/categories.csv +0 -164
  10. data/data/codes.csv +0 -262
  11. data/data/codesxref.csv +0 -280
  12. data/data/comparisons.csv +0 -75
  13. data/lib/factbook/almanac.rb +0 -72
  14. data/lib/factbook/attributes.rb +0 -74
  15. data/lib/factbook/builder.rb +0 -212
  16. data/lib/factbook/builder_item.rb +0 -126
  17. data/lib/factbook/builder_json.rb +0 -79
  18. data/lib/factbook/codes.rb +0 -119
  19. data/lib/factbook/comparisons.rb +0 -50
  20. data/lib/factbook/counter.rb +0 -48
  21. data/lib/factbook/db/importer.rb +0 -92
  22. data/lib/factbook/db/models.rb +0 -11
  23. data/lib/factbook/db/schema.rb +0 -36
  24. data/lib/factbook/normalize.rb +0 -43
  25. data/lib/factbook/page.rb +0 -148
  26. data/lib/factbook/page_info.rb +0 -12
  27. data/lib/factbook/reader_json.rb +0 -51
  28. data/lib/factbook/sanitizer.rb +0 -178
  29. data/lib/factbook/sect.rb +0 -29
  30. data/lib/factbook/subsect.rb +0 -18
  31. data/lib/factbook/table.rb +0 -52
  32. data/lib/factbook/utils.rb +0 -85
  33. data/lib/factbook/utils_info.rb +0 -129
  34. data/lib/factbook/version.rb +0 -21
  35. data/script/almanac.rb +0 -48
  36. data/script/attributes.rb +0 -34
  37. data/script/build.rb +0 -28
  38. data/script/counter.rb +0 -145
  39. data/script/json.rb +0 -19
  40. data/script/testbr.rb +0 -33
  41. data/script/testcodes.rb +0 -11
  42. data/test/data/au.html +0 -579
  43. data/test/data/au.yml +0 -8
  44. data/test/data/be.html +0 -596
  45. data/test/data/be.yml +0 -8
  46. data/test/data/json/au.json +0 -892
  47. data/test/data/src/ag.html +0 -716
  48. data/test/data/src/au-2015-09-24.html +0 -2006
  49. data/test/data/src/au.html +0 -658
  50. data/test/data/src/be-2015-09-24.html +0 -2011
  51. data/test/data/src/be.html +0 -648
  52. data/test/helper.rb +0 -11
  53. data/test/test_attribs.rb +0 -87
  54. data/test/test_attribs_def.rb +0 -20
  55. data/test/test_builder.rb +0 -35
  56. data/test/test_codes.rb +0 -76
  57. data/test/test_comparisons.rb +0 -19
  58. data/test/test_convert.rb +0 -30
  59. data/test/test_counter.rb +0 -31
  60. data/test/test_fields.rb +0 -52
  61. data/test/test_importer.rb +0 -56
  62. data/test/test_item_builder.rb +0 -99
  63. data/test/test_json.rb +0 -45
  64. data/test/test_json_builder.rb +0 -25
  65. data/test/test_normalize.rb +0 -23
  66. data/test/test_page.rb +0 -38
  67. data/test/test_sanitizer.rb +0 -39
  68. data/test/test_sanitizer_regex.rb +0 -89
@@ -1,79 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
- ######
6
- # json builder -- lets us rebuild a page from "dumped" json (instead of parsing html page)
7
-
8
- class JsonBuilder
9
- include LogUtils::Logging
10
- include NormalizeHelper ## e.g. normalize_category
11
-
12
-
13
- def self.from_file( path )
14
- text = File.read( path ) ## fix: use File.read_utf8 from textutils
15
- self.from_string( text )
16
- end
17
-
18
- def self.from_string( text )
19
- self.new( text )
20
- end
21
-
22
-
23
- attr_reader :text,
24
- :json,
25
- :info, ## not used yet -- page info incl. country_name, region_name, last_updated etc.
26
- :errors, ## not used yet -- encoding erros etc.
27
- :sects
28
-
29
-
30
- def initialize( text )
31
- @text = text
32
-
33
- @json = JSON.parse( text )
34
-
35
- @info = nil ## fix/todo: sorry - for now no page info (use header in json - why? why not??)
36
- @errors = [] ## fix/todo: sorry - for now no errors possible/tracked
37
-
38
- @sects = []
39
-
40
- @json.each do |k1,v1|
41
- sect_title = k1
42
- sect_subsects = v1
43
-
44
- sect = Sect.new
45
- sect.title = sect_title
46
-
47
- ## get subsections
48
- subsects = []
49
- sect_subsects.each do |k2,v2|
50
- subsect_title = k2
51
- subsect_data = v2
52
-
53
- subsect = Subsect.new
54
- subsect.title = subsect_title
55
-
56
- #####
57
- ## note: run data hash through normalize_category (again)
58
- if subsect_data.is_a?( Hash )
59
- new_subsect_data = {}
60
- subsect_data.each do |k3,v3|
61
- new_subsect_data[ normalize_category(k3) ] = v3
62
- end
63
- subsect_data = new_subsect_data
64
- end
65
-
66
- subsect.data = subsect_data
67
-
68
- subsects << subsect
69
- end
70
-
71
- sect.subsects = subsects
72
- @sects << sect
73
- end
74
- end
75
-
76
- end # class JsonBuilder
77
-
78
-
79
- end # module Factbook
@@ -1,119 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ##
4
- # note:
5
- # the factbook category/region for world is other entities (on FAQ) and oceans in page
6
- # changed to world
7
-
8
-
9
- module Factbook
10
-
11
- class Codes
12
-
13
- Code = Struct.new( :code, ## todo: add notes (country affiliation) - why? why not??
14
- :name,
15
- :category, ## e.g. Countries, Other, Oceans, World, Dependencies, etc.
16
- :region, ## e.g. Europe, Oceans, etc.
17
- )
18
-
19
- def self.from_csv( path )
20
- ###
21
- # note:
22
- # if you use quotes - NO leading spaces allowed e.g.
23
- # use au,"Austria",... and NOT
24
- # au, "Austria", ...
25
- #
26
- # for headers - NO leading spaces allowed e.g.
27
- # use Code,Name,Category,Region,... and NOT
28
- # Code, Name, Category, Region, ...
29
-
30
- rows = CSV.read( path, headers: true )
31
-
32
- pp rows
33
-
34
- recs = []
35
- rows.each do |row|
36
- pp row
37
- rec = Code.new
38
- rec.code = row['Code'].strip ## remove leading n trailing whitespaces
39
- rec.name = row['Name'].strip
40
-
41
- ## note: for now category and region are optional
42
- rec.category = row['Category'].strip if row['Category']
43
- rec.region = row['Region'].strip if row['Region']
44
-
45
- pp rec
46
- recs << rec
47
- end
48
-
49
- self.new( recs )
50
- end
51
-
52
- def initialize( codes )
53
- @codes = codes
54
- end
55
-
56
- def size() @codes.size; end
57
-
58
- def each
59
- @codes.each {|code| yield( code ) }
60
- end
61
-
62
- def to_a
63
- @codes.collect {|code| code.code } ## return array of codes
64
- end
65
-
66
- ## def all() self.to_a; end ## note: alias for to_a - use - why? why not??
67
-
68
- ## "pre-defined" convenience shortcuts
69
- def countries() category 'Countries'; end
70
- def world() category 'World'; end
71
- def oceans() category 'Oceans'; end
72
- def misc() category 'Miscellaneous'; end
73
- def others() category 'Other'; end
74
- def dependencies() category 'Dependencies'; end
75
- def dependencies_us() category 'Dependencies (United States)'; end
76
- ## fix/todo: add all dependencies uk (or gb?), fr,cn,au,nz,no,dk,etc.
77
-
78
- def europe() region 'Europe'; end
79
- def south_asia() region 'South Asia'; end
80
- def central_asia() region 'Central Asia'; end
81
- def east_n_souteast_asia() region 'East & Southeast Asia'; end
82
- def middle_east() region 'Middle East'; end
83
- def africa() region 'Africa'; end
84
- def north_america() region 'North America'; end
85
- def central_america_n_caribbean() region 'Central America and Caribbean'; end
86
- def south_america() region 'South America'; end
87
- def australia_oceania() region 'Australia-Oceania'; end
88
- def antartica() region 'Antarctica'; end
89
-
90
- ## note: regions oceans and world - same as category oceans and world
91
- ## use oceans_ii or world_ii or something ??
92
- ## use category('World') n region('World')
93
- ## use category('Oceans') n region('Oceans')
94
-
95
-
96
- def category( query )
97
- ## todo/future: allow passing in of regex too (not just string)
98
- ## note: e.g. Dependencies (France) needs to get escpaed to
99
- ## Dependencies \(France\) etc.
100
- filter_regex = /#{Regexp.escape(query)}/i
101
- codes = @codes.select do |code|
102
- code.category ? filter_regex.match( code.category ) : false ## note: allow nil for category; will fail on search
103
- end
104
- Codes.new( codes ) ## return new Codes obj for easy-chaining
105
- end
106
-
107
- def region( query )
108
- ## todo/future: allow passing in of regex too (not just string)
109
- filter_regex = /#{Regexp.escape(query)}/i
110
- codes = @codes.select do |code|
111
- code.region ? filter_regex.match( code.region ) : false ## note: allow nil for region; will fail on search
112
- end
113
- Codes.new( codes ) ## return new Codes obj for easy-chaining
114
- end
115
-
116
- end # class codes
117
-
118
- end # module Factbook
119
-
@@ -1,50 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
- class Comparisons
6
-
7
- Comparison = Struct.new( :num, ### todo: use no or id or something - why? why not?
8
- :category, ## e.g. Geography, People, Economy, etc.
9
- :name,
10
- )
11
-
12
- def self.from_csv( path )
13
-
14
- rows = CSV.read( path, headers: true )
15
-
16
- pp rows
17
-
18
- recs = []
19
- rows.each do |row|
20
- pp row
21
- rec = Comparison.new
22
- rec.num = row['Num'].strip.to_i ## remove leading n trailing whitespaces
23
- rec.category = row['Category'].strip
24
- rec.name = row['Name'].strip
25
-
26
- pp rec
27
- recs << rec
28
- end
29
-
30
- self.new( recs )
31
- end
32
-
33
- def initialize( comps )
34
- @comps = comps
35
- end
36
-
37
- def size() @comps.size; end
38
-
39
- def each
40
- @comps.each {|comp| yield( comp ) }
41
- end
42
-
43
- def to_a
44
- @comps.collect {|comp| comp.num } ## return array of nums -- return something else - why? why not?
45
- end
46
-
47
- end # class Comparison
48
-
49
- end # module Factbook
50
-
@@ -1,48 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
- class Counter
6
-
7
- attr_reader :data
8
-
9
- def initialize
10
- @data = {}
11
- end
12
-
13
- def count( page )
14
-
15
- ## walk page data hash
16
- # add nodes to data
17
-
18
- walk( page, page.data, @data )
19
- end
20
-
21
-
22
- private
23
- def walk( page, hin, hout )
24
- hin.each do |k,v|
25
- if v.is_a? Hash
26
- hout2 = hout[k] || { count: 0, codes: '' }
27
-
28
- hout2[ :count ] += 1
29
-
30
- ## delete codes if larger (treshhold) than x (e.g. 9)
31
- hout2.delete( :codes ) if hout2[ :count ] > 9
32
-
33
- codes = hout2[ :codes ]
34
- if codes ## note: might got deleted if passed treshhold (e.g. 9 entries)
35
- codes << ' ' unless codes.empty? ## add separator (space for now)
36
- codes << page.info.country_code
37
- hout2[ :codes ] = codes
38
- end
39
-
40
- hout[k] = hout2
41
- walk( page, v, hout2 )
42
- end
43
- end
44
- end
45
-
46
- end # class Counter
47
-
48
- end # module Factbook
@@ -1,92 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
- class Importer
6
-
7
- def import( page )
8
-
9
- ## note: assumes active connection
10
-
11
- code = page.info.country_code
12
- name = page.info.country_name
13
-
14
- attribs = {
15
- name: name,
16
- area: sq_km( page.area ), # e.g. 83,871 sq km
17
- area_land: sq_km( page.area_land ), # e.g. 82,445 sq km
18
- area_water: sq_km( page.area_water ), # e.g. 1,426 sq km
19
-
20
- population: num( page.population ), # e.g. 8,665,550 (July 2015 est.)
21
- population_growth: percent( page.population_growth ), # e.g. 0.55% (2015 est.)
22
- birth_rate: rate_per_thousand( page.birth_rate ), # e.g. 9.41 births/1,000 population (2015 est.)
23
- death_rate: rate_per_thousand( page.death_rate ), # e.g. 9.42 deaths/1,000 population (2015 est.)
24
- migration_rate: rate_per_thousand( page.migration_rate ), # e.g. 5.56 migrant(s)/1,000 population (2015 est.)
25
- }
26
-
27
- rec = Fact.find_by( code: code )
28
- if rec.nil? ## create (new) record
29
- rec = Fact.new
30
- attribs[ :code ] = code
31
- puts "create fact record #{code}/#{name}:"
32
- else ## update (exisiting) record
33
- puts "update fact record #{code}/#{name}:"
34
- end
35
-
36
- puts " #{attribs.inspect}"
37
- rec.update_attributes!( attribs )
38
- end
39
-
40
-
41
- def rate_per_thousand( text )
42
- # e.g. 9.41 births/1,000 population (2015 est.)
43
- # 9.42 deaths/1,000 population (2015 est.)
44
- # 5.56 migrant(s)/1,000 population (2015 est.)
45
-
46
- if text =~/([0-9\.]+) [a-z\(\)]+\/1,000/
47
- $1.to_f
48
- else
49
- puts "*** warn: unknown rate <name>/1,000 format (no match): >#{text}<"
50
- nil
51
- end
52
- end
53
-
54
- def num( text )
55
- # e.g. 8,665,550 (July 2015 est.)
56
-
57
- if text =~/([0-9,\.]+)/
58
- $1.gsub(',', '').to_i ## note: remove commas (,) if present
59
- else
60
- puts "*** warn: unknown number format (no match): >#{text}<"
61
- nil ## return nil
62
- end
63
- end
64
-
65
- def percent( text )
66
- # e.g. 0.55% (2015 est.)
67
-
68
- if text =~/([0-9\.]+)%/
69
- $1.to_f
70
- else
71
- puts "*** warn: unknown percent format (no match): >#{text}<"
72
- nil ## return nil
73
- end
74
- end
75
-
76
- def sq_km( text )
77
- # e.g. 83,871 sq km
78
- ## todo - check vatican - uses float e.g. 0.44 ?? add support?
79
-
80
- if text =~/([0-9,\.]+) sq km/
81
- $1.gsub(',', '').to_i ## note: remove commas (,) if present
82
- else
83
- puts "*** warn: unknown sq km format (no match): >#{text}<"
84
- nil ## return nil
85
- end
86
- end
87
-
88
-
89
- end # class Importer
90
-
91
- end # module Factbook
92
-
@@ -1,11 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
-
6
- class Fact < ActiveRecord::Base
7
-
8
- end # class Fact
9
-
10
-
11
- end # module Factbook
@@ -1,36 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
-
5
- class CreateDb
6
-
7
- def up
8
-
9
- ActiveRecord::Schema.define do
10
-
11
- create_table :facts do |t|
12
- t.string :code, null: false # country code e.g. au
13
- t.string :name, null: false # country name e.g. Austria
14
-
15
- t.integer :area # e.g. 83,871 sq km
16
- t.integer :area_land # e.g. 82,445 sq km --use float - why? why not?
17
- t.integer :area_water # e.g. 1,426 sq km
18
-
19
- t.integer :population # e.g. 8,665,550 (July 2015 est.)
20
- t.float :population_growth # e.g. 0.55% (2015 est.)
21
- t.float :birth_rate # e.g. 9.41 births/1,000 population (2015 est.)
22
- t.float :death_rate # e.g. 9.42 deaths/1,000 population (2015 est.)
23
- t.float :migration_rate # e.g. 5.56 migrant(s)/1,000 population (2015 est.)
24
-
25
- t.timestamps
26
- end
27
-
28
-
29
- end # block Schema.define
30
-
31
- end # method up
32
-
33
-
34
- end # class CreateDb
35
-
36
- end # module Factbook
@@ -1,43 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Factbook
4
- module NormalizeHelper
5
-
6
-
7
- def normalize_category( text )
8
-
9
- ## note: fix typos/errors with double colons e.g. note:: (instead of note:)
10
-
11
- text = text.strip
12
- text = text.sub( /:+\z/, '' ) # remove trailing : if present -- note: allow (fix) note:: too, thus, use :+
13
- text = text.strip
14
-
15
- #######################################
16
- ### special cases
17
-
18
- ## typos e.g ntoe => use note
19
- text = 'note' if text == 'ntoe'
20
- text = 'investment in fixed capital' if text == 'investment if fixed capital'
21
-
22
- ## downcase
23
- text = 'lowest point' if text == 'Lowest point'
24
- text = 'chief of state' if text == 'Chief of state'
25
-
26
- ## spelling variant (use more popular one)
27
- text = 'signed, but not ratified' if text == 'signed but not ratified'
28
- text = 'vectorborne diseases' if text == 'vectorborne disease'
29
- text = 'water contact disease' if text == 'water contact diseases'
30
- text = 'food or waterborne diseases' if text == 'food or waterborne disease'
31
- text = 'geographic coordinates' if text == 'geographical coordinates'
32
- text = 'note' if text == 'notes'
33
- text = 'refugees (country of origin)' if text == 'refugees (countries of origin)'
34
-
35
- ## border countries (8): -- remove (x) counter
36
- text = 'border countries' if text.start_with?( 'border countries')
37
-
38
- text
39
- end
40
-
41
-
42
- end # module NormalizeHelper
43
- end # module Factbook