factbook 0.1.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Manifest.txt +34 -22
- data/README.md +8 -3
- data/Rakefile +2 -263
- data/data/codes.csv +262 -0
- data/data/comparisons.csv +75 -0
- data/lib/factbook/builder.rb +214 -0
- data/lib/factbook/builder_item.rb +93 -0
- data/lib/factbook/codes.rb +119 -0
- data/lib/factbook/comparisons.rb +50 -0
- data/lib/factbook/page.rb +103 -303
- data/lib/factbook/sanitizer.rb +214 -0
- data/lib/factbook/sect.rb +29 -196
- data/lib/factbook/subsect.rb +18 -0
- data/lib/factbook/table.rb +52 -0
- data/lib/factbook/utils.rb +85 -0
- data/lib/factbook/utils_info.rb +102 -0
- data/lib/factbook/version.rb +4 -3
- data/lib/factbook.rb +23 -1
- data/test/data/au.html +579 -0
- data/test/data/au.yml +8 -0
- data/test/data/be.html +596 -0
- data/test/data/be.yml +8 -0
- data/test/data/src/au.html +2006 -0
- data/test/data/src/be.html +2011 -0
- data/test/helper.rb +0 -4
- data/test/test_builder.rb +37 -0
- data/test/test_codes.rb +76 -0
- data/test/test_comparisons.rb +19 -0
- data/test/test_fields.rb +21 -18
- data/test/test_item_builder.rb +99 -0
- data/test/test_json.rb +17 -20
- data/test/test_page.rb +18 -10
- data/test/test_sanitizer.rb +35 -0
- metadata +68 -49
- data/.gemtest +0 -0
- data/test/data/countrytemplate_au.html +0 -4179
- data/test/data/countrytemplate_be.html +0 -4260
- data/test/data/countrytemplate_br.html +0 -4366
- data/test/data/countrytemplate_ee.html +0 -2999
- data/test/data/countrytemplate_ls.html +0 -2728
- data/test/data/countrytemplate_mx.html +0 -4397
- data/test/data/countrytemplate_vt.html +0 -1726
- data/test/data/countrytemplate_xx.html +0 -2898
- data/test/test_page_old.rb +0 -478
- data/test/test_strip.rb +0 -66
@@ -0,0 +1,75 @@
|
|
1
|
+
Num,Category,Name
|
2
|
+
2147,Geography,Area
|
3
|
+
2119,People and Society,Population
|
4
|
+
2002,People and Society,Population growth rate
|
5
|
+
2054,People and Society,Birth rate
|
6
|
+
2066,People and Society,Death rate
|
7
|
+
2112,People and Society,Net migration rate
|
8
|
+
2223,People and Society,Maternal mortality rate
|
9
|
+
2091,People and Society,Infant mortality rate
|
10
|
+
2102,People and Society,Life expectancy at birth
|
11
|
+
2127,People and Society,Total fertility rate
|
12
|
+
2225,People and Society,Health expenditures
|
13
|
+
2155,People and Society,HIV/AIDS - adult prevalence rate
|
14
|
+
2156,People and Society,HIV/AIDS - people living with HIV/AIDS
|
15
|
+
2157,People and Society,HIV/AIDS - deaths
|
16
|
+
2228,People and Society,Obesity - adult prevalence rate
|
17
|
+
2224,People and Society,Children under the age of 5 years underweight
|
18
|
+
2206,People and Society,Education expenditures
|
19
|
+
2229,People and Society,"Unemployment, youth ages 15-24"
|
20
|
+
2001,Economy,GDP (purchasing power parity)
|
21
|
+
2003,Economy,GDP - real growth rate
|
22
|
+
2004,Economy,GDP - per capita (PPP)
|
23
|
+
2260,Economy,Gross national saving
|
24
|
+
2089,Economy,Industrial production growth rate
|
25
|
+
2095,Economy,Labor force
|
26
|
+
2129,Economy,Unemployment rate
|
27
|
+
2172,Economy,Distribution of family income - Gini index
|
28
|
+
2221,Economy,Taxes and other revenues
|
29
|
+
2222,Economy,Budget surplus (+) or deficit (-)
|
30
|
+
2186,Economy,Public debt
|
31
|
+
2092,Economy,Inflation rate (consumer prices)
|
32
|
+
2207,Economy,Central bank discount rate
|
33
|
+
2208,Economy,Commercial bank prime lending rate
|
34
|
+
2214,Economy,Stock of narrow money
|
35
|
+
2215,Economy,Stock of broad money
|
36
|
+
2211,Economy,Stock of domestic credit
|
37
|
+
2200,Economy,Market value of publicly traded shares
|
38
|
+
2187,Economy,Current account balance
|
39
|
+
2078,Economy,Exports
|
40
|
+
2087,Economy,Imports
|
41
|
+
2188,Economy,Reserves of foreign exchange and gold
|
42
|
+
2079,Economy,Debt - external
|
43
|
+
2198,Economy,Stock of direct foreign investment - at home
|
44
|
+
2199,Economy,Stock of direct foreign investment - abroad
|
45
|
+
2232,Energy,Electricity - production
|
46
|
+
2233,Energy,Electricity - consumption
|
47
|
+
2234,Energy,Electricity - exports
|
48
|
+
2235,Energy,Electricity - imports
|
49
|
+
2236,Energy,Electricity - installed generating capacity
|
50
|
+
2237,Energy,Electricity - from fossil fuels
|
51
|
+
2239,Energy,Electricity - from nuclear fuels
|
52
|
+
2238,Energy,Electricity - from hydroelectric plants
|
53
|
+
2240,Energy,Electricity - from other renewable sources
|
54
|
+
2241,Energy,Crude oil - production
|
55
|
+
2242,Energy,Crude oil - exports
|
56
|
+
2243,Energy,Crude oil - imports
|
57
|
+
2244,Energy,Crude oil - proved reserves
|
58
|
+
2245,Energy,Refined petroleum products - production
|
59
|
+
2246,Energy,Refined petroleum products - consumption
|
60
|
+
2247,Energy,Refined petroleum products - exports
|
61
|
+
2248,Energy,Refined petroleum products - imports
|
62
|
+
2249,Energy,Natural gas - production
|
63
|
+
2250,Energy,Natural gas - consumption
|
64
|
+
2251,Energy,Natural gas - exports
|
65
|
+
2252,Energy,Natural gas - imports
|
66
|
+
2253,Energy,Natural gas - proved reserves
|
67
|
+
2150,Communications,Telephones - fixed lines
|
68
|
+
2151,Communications,Telephones - mobile cellular
|
69
|
+
2153,Communications,Internet users
|
70
|
+
2053,Transportation,Airports
|
71
|
+
2121,Transportation,Railways
|
72
|
+
2085,Transportation,Roadways
|
73
|
+
2093,Transportation,Waterways
|
74
|
+
2108,Transportation,Merchant marine
|
75
|
+
2034,Military,Military expenditures
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Factbook
|
4
|
+
|
5
|
+
class Builder ## todo: change to PageBuilder ???
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
8
|
+
|
9
|
+
=begin
|
10
|
+
def self.from_cc( cc, opts={} ) ## rename to from_file_for_country() or from_file_for_cc() or something - why?? why not??
|
11
|
+
## check/todo: rename input_dir to just dir or to include ?
|
12
|
+
## (there's no output_dir)?? - why? why not?
|
13
|
+
input_dir = opts[:input_dir] || '.'
|
14
|
+
self.from_file( "#{input_dir}/#{cc}.html" )
|
15
|
+
end
|
16
|
+
=end
|
17
|
+
|
18
|
+
|
19
|
+
def self.from_file( path )
|
20
|
+
html_ascii = File.read( path ) ## fix/todo: use ASCII8BIT/binary reader !!!!!
|
21
|
+
self.new( html_ascii )
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
attr_reader :html_ascii, ## full "original" 1:1 page in "original/ascii8/binary" encoding
|
26
|
+
:html, ## utf-8 encoded profile
|
27
|
+
:html_debug, ## html w/ mapping markers - rename to html_markers - why? why not?
|
28
|
+
:page_info, ## incl. country_name, region_name, last_updated etc.
|
29
|
+
:errors, ## encoding erros etc.
|
30
|
+
:page
|
31
|
+
|
32
|
+
def initialize( html_ascii )
|
33
|
+
@html_ascii = html_ascii
|
34
|
+
|
35
|
+
## todo/fix: use/assume windows 12xx?? encoding - change encoding to utf-8 (from binary/ascii8bit)
|
36
|
+
@html, @page_info, @errors = Sanitizer.new.sanitize( @html_ascii )
|
37
|
+
|
38
|
+
@html_debug = map_sects( @html )
|
39
|
+
@html_debug = map_subsects( @html_debug )
|
40
|
+
|
41
|
+
html_sects = split_sects( @html_debug )
|
42
|
+
pp html_sects
|
43
|
+
|
44
|
+
|
45
|
+
page = Page.new
|
46
|
+
sects = []
|
47
|
+
html_sects.each do |html_sect|
|
48
|
+
html_sect_head = html_sect[0]
|
49
|
+
html_subsects = html_sect[1]
|
50
|
+
puts html_sect_head
|
51
|
+
puts html_subsects.size
|
52
|
+
|
53
|
+
## get section title
|
54
|
+
## @SECTION{Economy} => Economy
|
55
|
+
if html_sect_head =~ /@SECTION{(.+?)}/
|
56
|
+
title = $1.strip
|
57
|
+
puts title
|
58
|
+
sect = Sect.new
|
59
|
+
sect.title = title
|
60
|
+
## get subsections
|
61
|
+
subsects = []
|
62
|
+
html_subsects.each do |html_subsect|
|
63
|
+
html_subsect_head = html_subsect[0]
|
64
|
+
html_subsect_body = html_subsect[1]
|
65
|
+
if html_subsect_head =~ /@SUBSECTION{(.+?)}/
|
66
|
+
title = $1.strip
|
67
|
+
title = title.sub( /:\z/, '' ) # remove trailing : if present
|
68
|
+
title = title.strip
|
69
|
+
|
70
|
+
puts title
|
71
|
+
subsect = Subsect.new
|
72
|
+
subsect.title = title ## todo/fix: cut off trailing colon (:)
|
73
|
+
|
74
|
+
b = Factbook::ItemBuilder.new( html_subsect_body, title )
|
75
|
+
h = b.read
|
76
|
+
subsect.data = h
|
77
|
+
|
78
|
+
subsects << subsect
|
79
|
+
else
|
80
|
+
## warn/fix: no subsection title found
|
81
|
+
end
|
82
|
+
end
|
83
|
+
sect.subsects = subsects
|
84
|
+
sects << sect
|
85
|
+
else
|
86
|
+
## warn/fix: no section title found
|
87
|
+
end
|
88
|
+
end
|
89
|
+
page.sects = sects
|
90
|
+
@page = page
|
91
|
+
|
92
|
+
pp page
|
93
|
+
|
94
|
+
self ## return self -- needed?? default (standard) anyway?? check and remove
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
def map_sects( html )
|
100
|
+
## convert section titles
|
101
|
+
## from <h2>..</h2>
|
102
|
+
## to "unified" marker
|
103
|
+
|
104
|
+
## e.g.
|
105
|
+
## <h2 sectiontitle='Introduction' ccode='au'>Introduction :: <span class='region'>AUSTRIA </span></h2>
|
106
|
+
## <h2>Introduction</h2>
|
107
|
+
|
108
|
+
title_regex= /<h2
|
109
|
+
(?:\s[^>]+)? ## allow optional attributes in h2
|
110
|
+
>
|
111
|
+
\s*
|
112
|
+
([^<>]+?) ## note: use non-greedy; do NOT allow tags inside for now
|
113
|
+
\s*
|
114
|
+
(?:\s::\s
|
115
|
+
.+? ## note: use non-greedy; allows tags inside
|
116
|
+
)? ## strip optional name (e.g. :: AUSTRIA)
|
117
|
+
<\/h2>
|
118
|
+
/xim
|
119
|
+
|
120
|
+
html = html.gsub( title_regex ) do |m|
|
121
|
+
puts "** found section >#{$1}<:"
|
122
|
+
puts " >|#{m}|<"
|
123
|
+
|
124
|
+
"\n\n@SECTION{#{$1}}\n\n"
|
125
|
+
end
|
126
|
+
html
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
def map_subsects( html )
|
131
|
+
## convert subsection titles
|
132
|
+
## from <div id='field'>..</div>
|
133
|
+
## to "unified" marker
|
134
|
+
|
135
|
+
## e.g.
|
136
|
+
## <div id='field' class='category'>Disputes - international:</div>
|
137
|
+
|
138
|
+
title_regex= /<div \s id='field'
|
139
|
+
\s class='category'>
|
140
|
+
\s*
|
141
|
+
(.+?) ## note: use non-greedy; allows tags inside - why? why not
|
142
|
+
\s*
|
143
|
+
<\/div>
|
144
|
+
/xim
|
145
|
+
|
146
|
+
html = html.gsub( title_regex ) do |m|
|
147
|
+
puts "** found subsection >#{$1}<:"
|
148
|
+
puts " >|#{m}|<"
|
149
|
+
|
150
|
+
"\n@SUBSECTION{#{$1}}\n"
|
151
|
+
end
|
152
|
+
html
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
def split_sects( html )
|
158
|
+
####
|
159
|
+
# split html in sections (divided by section headings)
|
160
|
+
# e.g. remove optional prolog ??,
|
161
|
+
## [[heading,sect],
|
162
|
+
## [heading,sect],
|
163
|
+
## [heading,sect],...]
|
164
|
+
|
165
|
+
## note: "wrap" regex in a capture group (just one)
|
166
|
+
## String#split will include all catpure groups in the result array
|
167
|
+
|
168
|
+
section_regex= /(@SECTION{.+?})/ ## note: use non-greedy -- check: need to escape {} ??
|
169
|
+
|
170
|
+
chunks = html.split( section_regex )
|
171
|
+
|
172
|
+
## check if first item is a section or (html) prolog
|
173
|
+
# if prolog (remove)
|
174
|
+
chunks.slice!(0) unless chunks[0] =~ /@SECTION/ ## starts w/ @SECTION
|
175
|
+
|
176
|
+
pairs = chunks.each_slice(2).to_a
|
177
|
+
|
178
|
+
## now split subsections
|
179
|
+
newpairs = []
|
180
|
+
pairs.each do |item|
|
181
|
+
## todo: after cleanup prolog; remove @SECTION{} ?? - just keep title - why, why not??
|
182
|
+
newpairs << [item[0], split_subsects( item[1]) ]
|
183
|
+
end
|
184
|
+
newpairs
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
def split_subsects( html )
|
189
|
+
####
|
190
|
+
# split html in subsections (divided by subsection headings)
|
191
|
+
# e.g. remove optional prolog ??,
|
192
|
+
## [[heading,sect],
|
193
|
+
## [heading,sect],
|
194
|
+
## [heading,sect],...]
|
195
|
+
|
196
|
+
## note: "wrap" regex in a capture group (just one)
|
197
|
+
## String#split will include all catpure groups in the result array
|
198
|
+
|
199
|
+
subsection_regex= /(@SUBSECTION{.+?})/ ## note: use non-greedy -- check: need to escape {} ??
|
200
|
+
|
201
|
+
chunks = html.split( subsection_regex )
|
202
|
+
|
203
|
+
## check if first item is a section or (html) prolog
|
204
|
+
# if prolog (remove)
|
205
|
+
chunks.slice!(0) unless chunks[0] =~ /@SUBSECTION/ ## starts w/ @SUBSECTION
|
206
|
+
|
207
|
+
pairs = chunks.each_slice(2).to_a
|
208
|
+
pairs
|
209
|
+
end
|
210
|
+
|
211
|
+
end # class Builder
|
212
|
+
|
213
|
+
|
214
|
+
end # module Factbook
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Factbook
|
4
|
+
|
5
|
+
class ItemBuilder ## renameto ItemReader, ItemParser - why? why not??
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
8
|
+
def initialize( html, name )
|
9
|
+
@html = html
|
10
|
+
@name = name # add category/field name e.g. Area, Location, etc.
|
11
|
+
end
|
12
|
+
|
13
|
+
def read
|
14
|
+
## return hash from html snippet
|
15
|
+
doc = Nokogiri::HTML.fragment( @html )
|
16
|
+
|
17
|
+
data = {}
|
18
|
+
last_node = nil ## track last hash (always use text key)
|
19
|
+
last_node_data_count = 0
|
20
|
+
|
21
|
+
## note:
|
22
|
+
## skip whitespace text nodes (e.g. \n\n etc); just use divs
|
23
|
+
doc.children.filter('div').each_with_index do |child,i|
|
24
|
+
|
25
|
+
if child['class'] == 'category_data'
|
26
|
+
text = child.text ## fix/todo: use strip
|
27
|
+
puts "category_data: >#{text}<"
|
28
|
+
|
29
|
+
if last_node.nil?
|
30
|
+
## assume its the very first entry; use implied/auto-created category
|
31
|
+
data['text'] = ''
|
32
|
+
last_node = data
|
33
|
+
last_node_data_count = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
### first category_data element?
|
37
|
+
if last_node_data_count == 0
|
38
|
+
if last_node['text'] == ''
|
39
|
+
last_node['text'] = text
|
40
|
+
else ### possible ??? if data_count is zero - not should not include any data
|
41
|
+
## todo: issue warning here - why? why not??
|
42
|
+
last_node['text'] += " #{text}" ## append w/o separator
|
43
|
+
end
|
44
|
+
else
|
45
|
+
if @name == 'demographic_profile' || @name == 'Demographic profile' ## special case (use space a sep)
|
46
|
+
last_node['text'] += " #{text}" ## append without (w/o) separator
|
47
|
+
else
|
48
|
+
last_node['text'] += " ++ #{text}" ## append with ++ separator
|
49
|
+
end
|
50
|
+
end
|
51
|
+
last_node_data_count += 1
|
52
|
+
|
53
|
+
elsif child['class'].nil? ## div without any class e.g. <div>..</div>
|
54
|
+
## assume category and category_data pair w/ spans
|
55
|
+
spans = child.children.filter('span')
|
56
|
+
if spans.size > 2
|
57
|
+
puts "*** warn: expected two (or one) spans; got #{spans.inspect}"
|
58
|
+
end
|
59
|
+
|
60
|
+
## pp spans
|
61
|
+
|
62
|
+
span_key = spans[0] ## assume 1st entry is span.category
|
63
|
+
span_value = spans[1] ## assume 2nd entry is span.category_data')
|
64
|
+
## allow optional category_data for now
|
65
|
+
key = span_key.text
|
66
|
+
|
67
|
+
key = key.strip
|
68
|
+
key = key.sub( /:\z/, '' ) # remove trailing : if present
|
69
|
+
key = key.strip
|
70
|
+
|
71
|
+
value = span_value ? span_value.text : nil
|
72
|
+
|
73
|
+
puts "key: >#{key}<, value: >#{value}< : #{value.class.name}"
|
74
|
+
|
75
|
+
## start new pair
|
76
|
+
last_node = data[key] = { 'text' => value }
|
77
|
+
last_node_data_count = value ? 1 : 0 ## note: set to 1 if value present
|
78
|
+
else
|
79
|
+
puts "*** warn: item builder -- unknow css class in #{child.inspect}"
|
80
|
+
end
|
81
|
+
|
82
|
+
## pp child
|
83
|
+
## css = child['class']
|
84
|
+
## puts "[#{i}] #{child.name} class='>#{css}< : #{css.class.name}' >#{child.text}<"
|
85
|
+
end
|
86
|
+
|
87
|
+
pp data
|
88
|
+
data
|
89
|
+
end
|
90
|
+
|
91
|
+
end # class ItemBuilder
|
92
|
+
|
93
|
+
end # module Factbook
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
##
|
4
|
+
# note:
|
5
|
+
# the factbook category/region for world is other entities (on FAQ) and oceans in page
|
6
|
+
# changed to world
|
7
|
+
|
8
|
+
|
9
|
+
module Factbook
|
10
|
+
|
11
|
+
class Codes
|
12
|
+
|
13
|
+
Code = Struct.new( :code, ## todo: add notes (country affiliation) - why? why not??
|
14
|
+
:name,
|
15
|
+
:category, ## e.g. Countries, Other, Oceans, World, Dependencies, etc.
|
16
|
+
:region, ## e.g. Europe, Oceans, etc.
|
17
|
+
)
|
18
|
+
|
19
|
+
def self.from_csv( path )
|
20
|
+
###
|
21
|
+
# note:
|
22
|
+
# if you use quotes - NO leading spaces allowed e.g.
|
23
|
+
# use au,"Austria",... and NOT
|
24
|
+
# au, "Austria", ...
|
25
|
+
#
|
26
|
+
# for headers - NO leading spaces allowed e.g.
|
27
|
+
# use Code,Name,Category,Region,... and NOT
|
28
|
+
# Code, Name, Category, Region, ...
|
29
|
+
|
30
|
+
rows = CSV.read( path, headers: true )
|
31
|
+
|
32
|
+
pp rows
|
33
|
+
|
34
|
+
recs = []
|
35
|
+
rows.each do |row|
|
36
|
+
pp row
|
37
|
+
rec = Code.new
|
38
|
+
rec.code = row['Code'].strip ## remove leading n trailing whitespaces
|
39
|
+
rec.name = row['Name'].strip
|
40
|
+
|
41
|
+
## note: for now category and region are optional
|
42
|
+
rec.category = row['Category'].strip if row['Category']
|
43
|
+
rec.region = row['Region'].strip if row['Region']
|
44
|
+
|
45
|
+
pp rec
|
46
|
+
recs << rec
|
47
|
+
end
|
48
|
+
|
49
|
+
self.new( recs )
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize( codes )
|
53
|
+
@codes = codes
|
54
|
+
end
|
55
|
+
|
56
|
+
def size() @codes.size; end
|
57
|
+
|
58
|
+
def each
|
59
|
+
@codes.each {|code| yield( code ) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_a
|
63
|
+
@codes.collect {|code| code.code } ## return array of codes
|
64
|
+
end
|
65
|
+
|
66
|
+
## def all() self.to_a; end ## note: alias for to_a - use - why? why not??
|
67
|
+
|
68
|
+
## "pre-defined" convenience shortcuts
|
69
|
+
def countries() category 'Countries'; end
|
70
|
+
def world() category 'World'; end
|
71
|
+
def oceans() category 'Oceans'; end
|
72
|
+
def misc() category 'Miscellaneous'; end
|
73
|
+
def others() category 'Other'; end
|
74
|
+
def dependencies() category 'Dependencies'; end
|
75
|
+
def dependencies_us() category 'Dependencies (United States)'; end
|
76
|
+
## fix/todo: add all dependencies uk (or gb?), fr,cn,au,nz,no,dk,etc.
|
77
|
+
|
78
|
+
def europe() region 'Europe'; end
|
79
|
+
def south_asia() region 'South Asia'; end
|
80
|
+
def central_asia() region 'Central Asia'; end
|
81
|
+
def east_n_souteast_asia() region 'East & Southeast Asia'; end
|
82
|
+
def middle_east() region 'Middle East'; end
|
83
|
+
def africa() region 'Africa'; end
|
84
|
+
def north_america() region 'North America'; end
|
85
|
+
def central_america_n_caribbean() region 'Central America and Caribbean'; end
|
86
|
+
def south_america() region 'South America'; end
|
87
|
+
def australia_oceania() region 'Australia-Oceania'; end
|
88
|
+
def antartica() region 'Antarctica'; end
|
89
|
+
|
90
|
+
## note: regions oceans and world - same as category oceans and world
|
91
|
+
## use oceans_ii or world_ii or something ??
|
92
|
+
## use category('World') n region('World')
|
93
|
+
## use category('Oceans') n region('Oceans')
|
94
|
+
|
95
|
+
|
96
|
+
def category( query )
|
97
|
+
## todo/future: allow passing in of regex too (not just string)
|
98
|
+
## note: e.g. Dependencies (France) needs to get escpaed to
|
99
|
+
## Dependencies \(France\) etc.
|
100
|
+
filter_regex = /#{Regexp.escape(query)}/i
|
101
|
+
codes = @codes.select do |code|
|
102
|
+
code.category ? filter_regex.match( code.category ) : false ## note: allow nil for category; will fail on search
|
103
|
+
end
|
104
|
+
Codes.new( codes ) ## return new Codes obj for easy-chaining
|
105
|
+
end
|
106
|
+
|
107
|
+
def region( query )
|
108
|
+
## todo/future: allow passing in of regex too (not just string)
|
109
|
+
filter_regex = /#{Regexp.escape(query)}/i
|
110
|
+
codes = @codes.select do |code|
|
111
|
+
code.region ? filter_regex.match( code.region ) : false ## note: allow nil for region; will fail on search
|
112
|
+
end
|
113
|
+
Codes.new( codes ) ## return new Codes obj for easy-chaining
|
114
|
+
end
|
115
|
+
|
116
|
+
end # class codes
|
117
|
+
|
118
|
+
end # module Factbook
|
119
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Factbook
|
4
|
+
|
5
|
+
class Comparisons
|
6
|
+
|
7
|
+
Comparison = Struct.new( :num, ### todo: use no or id or something - why? why not?
|
8
|
+
:category, ## e.g. Geography, People, Economy, etc.
|
9
|
+
:name,
|
10
|
+
)
|
11
|
+
|
12
|
+
def self.from_csv( path )
|
13
|
+
|
14
|
+
rows = CSV.read( path, headers: true )
|
15
|
+
|
16
|
+
pp rows
|
17
|
+
|
18
|
+
recs = []
|
19
|
+
rows.each do |row|
|
20
|
+
pp row
|
21
|
+
rec = Comparison.new
|
22
|
+
rec.num = row['Num'].strip.to_i ## remove leading n trailing whitespaces
|
23
|
+
rec.category = row['Category'].strip
|
24
|
+
rec.name = row['Name'].strip
|
25
|
+
|
26
|
+
pp rec
|
27
|
+
recs << rec
|
28
|
+
end
|
29
|
+
|
30
|
+
self.new( recs )
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize( comps )
|
34
|
+
@comps = comps
|
35
|
+
end
|
36
|
+
|
37
|
+
def size() @comps.size; end
|
38
|
+
|
39
|
+
def each
|
40
|
+
@comps.each {|comp| yield( comp ) }
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_a
|
44
|
+
@comps.collect {|comp| comp.num } ## return array of nums -- return something else - why? why not?
|
45
|
+
end
|
46
|
+
|
47
|
+
end # class Comparison
|
48
|
+
|
49
|
+
end # module Factbook
|
50
|
+
|