csvhuman 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/README.md +137 -11
- data/lib/csvhuman.rb +15 -25
- data/lib/csvhuman/base.rb +17 -0
- data/lib/csvhuman/column.rb +46 -16
- data/lib/csvhuman/converter.rb +21 -0
- data/lib/csvhuman/reader.rb +30 -15
- data/lib/csvhuman/tag.rb +13 -2
- data/lib/csvhuman/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_header_converter.rb +63 -0
- data/test/test_reader.rb +93 -5
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34cc6d7eb412bf02188cde5b26201cc1e5c8a46c
|
4
|
+
data.tar.gz: 4c04e8bc8678b19923a42f05d11114389ff78ab5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 404dc263217830a3dfc68e15114bd3e45a406774638574106708fed616a1fa9245becd6cab9a54889ef8335bffc0be5ca9324bb15779510fd4a5eea365156ecd
|
7
|
+
data.tar.gz: 8689b1d38c6edade697583786e595332c5a496285c778967ad8a35557290d0b08a62056b75fc194d995f571220e4836eadcbf6d5f9083ec92cc5d97b3877d9cd
|
data/Manifest.txt
CHANGED
@@ -3,11 +3,14 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/csvhuman.rb
|
6
|
+
lib/csvhuman/base.rb
|
6
7
|
lib/csvhuman/column.rb
|
8
|
+
lib/csvhuman/converter.rb
|
7
9
|
lib/csvhuman/reader.rb
|
8
10
|
lib/csvhuman/tag.rb
|
9
11
|
lib/csvhuman/version.rb
|
10
12
|
test/data/test.csv
|
11
13
|
test/helper.rb
|
14
|
+
test/test_header_converter.rb
|
12
15
|
test/test_reader.rb
|
13
16
|
test/test_tags.rb
|
data/README.md
CHANGED
@@ -79,30 +79,80 @@ resulting in:
|
|
79
79
|
"subsector" => "Subsector 1",
|
80
80
|
"org" => "Org 1",
|
81
81
|
"country" => "Country 1",
|
82
|
-
"sex+targeted" => [
|
82
|
+
"sex+targeted" => [100, 100],
|
83
83
|
"adm1" => "Region 1"},
|
84
84
|
{"sector+en" => "Health",
|
85
85
|
"subsector" => "Subsector 2",
|
86
86
|
"org" => "Org 2",
|
87
87
|
"country" => "Country 2",
|
88
|
-
"sex+targeted" => [
|
88
|
+
"sex+targeted" => [nil, nil],
|
89
89
|
"adm1" => "Region 2"},
|
90
90
|
{"sector+en" => "Education",
|
91
91
|
"subsector" => "Subsector 3",
|
92
92
|
"org" => "Org 3",
|
93
93
|
"country" => "Country 2",
|
94
|
-
"sex+targeted" => [
|
94
|
+
"sex+targeted" => [250, 300],
|
95
95
|
"adm1" => "Region 3"},
|
96
96
|
{"sector+en" => "WASH",
|
97
97
|
"subsector" => "Subsector 4",
|
98
98
|
"org" => "Org 1",
|
99
99
|
"country" => "Country 3",
|
100
|
-
"sex+targeted" => [
|
100
|
+
"sex+targeted" => [80, 95],
|
101
101
|
"adm1" => "Region 4"}]
|
102
102
|
```
|
103
103
|
|
104
104
|
|
105
|
-
|
105
|
+
### What about Enumerable?
|
106
|
+
|
107
|
+
Yes, every reader includes `Enumerable` and runs on `each`.
|
108
|
+
Use `new` or `open` without a block
|
109
|
+
to get the enumerator (iterator).
|
110
|
+
Example:
|
111
|
+
|
112
|
+
|
113
|
+
``` ruby
|
114
|
+
csv = CsvHuman.new( <<TXT ) ## or use HXL.new
|
115
|
+
What,,,Who,Where,For whom,
|
116
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
117
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
118
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
119
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
120
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
121
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
122
|
+
TXT )
|
123
|
+
it = csv.to_enum
|
124
|
+
pp it.next
|
125
|
+
# => {"sector+en" => "WASH",
|
126
|
+
# "subsector" => "Subsector 1",
|
127
|
+
# "org" => "Org 1",
|
128
|
+
# "country" => "Country 1",
|
129
|
+
# "sex+targeted" => [100, 100],
|
130
|
+
# "adm1" => "Region 1"}
|
131
|
+
|
132
|
+
|
133
|
+
# -or-
|
134
|
+
|
135
|
+
csv = CsvHuman.open( "./test.csv" ) # or use HXL.open
|
136
|
+
it = csv.to_enum
|
137
|
+
pp it.next
|
138
|
+
# => {"sector+en" => "WASH",
|
139
|
+
# "subsector" => "Subsector 1",
|
140
|
+
# "org" => "Org 1",
|
141
|
+
# "country" => "Country 1",
|
142
|
+
# "sex+targeted" => [100, 100],
|
143
|
+
# "adm1" => "Region 1"}
|
144
|
+
pp it.next
|
145
|
+
# => {"sector+en" => "Health",
|
146
|
+
# "subsector" => "Subsector 2",
|
147
|
+
# "org" => "Org 2",
|
148
|
+
# "country" => "Country 2",
|
149
|
+
# "sex+targeted" => [nil, nil],
|
150
|
+
# "adm1" => "Region 2"}
|
151
|
+
```
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
### More Ways to Use
|
106
156
|
|
107
157
|
``` ruby
|
108
158
|
csv = CsvHuman.new( recs )
|
@@ -110,8 +160,6 @@ csv.each do |rec|
|
|
110
160
|
pp rec
|
111
161
|
end
|
112
162
|
|
113
|
-
pp csv.read
|
114
|
-
|
115
163
|
|
116
164
|
CsvHuman.parse( recs ).each do |rec|
|
117
165
|
pp rec
|
@@ -136,8 +184,6 @@ hxl.each do |rec|
|
|
136
184
|
pp rec
|
137
185
|
end
|
138
186
|
|
139
|
-
pp hxl.read
|
140
|
-
|
141
187
|
|
142
188
|
HXL.parse( recs ).each do |rec|
|
143
189
|
pp rec
|
@@ -160,11 +206,91 @@ Note: More aliases for `CsvHuman`, `HXL`? Yes, you can use
|
|
160
206
|
|
161
207
|
|
162
208
|
|
163
|
-
|
209
|
+
### What about symbol keys for hashes?
|
210
|
+
|
211
|
+
Yes, you can use the `header_converter` keyword option.
|
212
|
+
Use `:symbol` for (auto-)converting header tags (strings) to symbols.
|
213
|
+
Note: the symbol converter will remove all hashtags (`#`) and spaces and
|
214
|
+
will change the plus (`+`) to underscore (`_`)
|
215
|
+
and remove all non-alphanumeric (e.g. `!?$%`) chars.
|
216
|
+
|
217
|
+
Example:
|
218
|
+
|
219
|
+
``` ruby
|
220
|
+
txt =<<TXT
|
221
|
+
What,,,Who,Where,For whom,
|
222
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
223
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
224
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
225
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
226
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
227
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
228
|
+
TXT
|
229
|
+
|
230
|
+
pp CsvHuman.parse( txt, :header_converter => :symbol ) ## or use HXL.parse
|
231
|
+
|
232
|
+
# -or-
|
233
|
+
|
234
|
+
options = { :header_converter => :symbol }
|
235
|
+
pp CsvHuman.parse( txt, options )
|
236
|
+
```
|
237
|
+
|
238
|
+
resulting in:
|
239
|
+
|
240
|
+
``` ruby
|
241
|
+
[{:sector_en => "WASH",
|
242
|
+
:subsector => "Subsector 1",
|
243
|
+
:org => "Org 1",
|
244
|
+
:country => "Country 1",
|
245
|
+
:sex_targeted => [100, 100],
|
246
|
+
:adm1 => "Region 1"},
|
247
|
+
# ...
|
248
|
+
{:sector_en => "WASH",
|
249
|
+
:subsector => "Subsector 4",
|
250
|
+
:org => "Org 1",
|
251
|
+
:country => "Country 3",
|
252
|
+
:sex_targeted => [80, 95],
|
253
|
+
:adm1 => "Region 4"}]
|
254
|
+
```
|
255
|
+
|
256
|
+
Built-in header converters include:
|
257
|
+
|
258
|
+
| Converter | Comments |
|
259
|
+
|--------------|---------------------|
|
260
|
+
| `:none` | string key; uses "normalized" tag e.g. `"#adm1 +code"` |
|
261
|
+
| `:default` | string key; strips hashtags and spaces e.g. `"admin+code"` |
|
262
|
+
| `:symbol` | symbol key; strips hashtags and spaces and converts plus (`+`) to underscore (`_`) and removes all non-alphanumerics e.g. `:admin_code` |
|
263
|
+
|
264
|
+
Or add your own converters. Example:
|
265
|
+
|
266
|
+
``` ruby
|
267
|
+
pp CsvHuman.parse( txt, header_converter: ->(h) { h.upcase } )
|
268
|
+
```
|
269
|
+
|
270
|
+
resulting in:
|
271
|
+
|
272
|
+
``` ruby
|
273
|
+
[{"#SECTOR +EN" => "WASH",
|
274
|
+
"#SUBSECTOR" => "Subsector 1",
|
275
|
+
"#ORG" => "Org 1",
|
276
|
+
"#COUNTRY" => "Country 1",
|
277
|
+
"#SEX +TARGETED" => [100, 100],
|
278
|
+
"#ADM1" => "Region 1"},
|
279
|
+
# ...
|
280
|
+
]
|
281
|
+
```
|
282
|
+
|
283
|
+
A custom header converter is a method that gets the (normalized) header tag
|
284
|
+
passed in (e.g. `#sector +en`) as a string
|
285
|
+
and returns a string or symbol to use for the hash key in records.
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
### Tag Helpers
|
164
290
|
|
165
291
|
**Normalize**. Use `CsvHuman::Tag.normalize` to pretty print or normalize a tag.
|
166
292
|
All parts get downcased (lowercased), all attributes sorted by a-to-z,
|
167
|
-
all extra or missing hashtags or pluses added or removed
|
293
|
+
all extra or missing hashtags or pluses added or removed,
|
168
294
|
all extra or missing spaces added or removed. Example:
|
169
295
|
|
170
296
|
``` ruby
|
data/lib/csvhuman.rb
CHANGED
@@ -1,25 +1,15 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require '
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
## add some "alternative" shortcut aliases
|
17
|
-
CsvHum = CsvHuman
|
18
|
-
CSV_HXL = CsvHuman
|
19
|
-
CSVHXL = CsvHuman
|
20
|
-
HXL = CsvHuman
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
# say hello
|
25
|
-
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'csvreader' ## add all "alternative" shortcut aliases
|
4
|
+
|
5
|
+
|
6
|
+
## our own code (without "top-level" shortcuts e.g. "modular version")
|
7
|
+
require 'csvhuman/base'
|
8
|
+
|
9
|
+
|
10
|
+
####
|
11
|
+
# add some "alternative" shortcut aliases
|
12
|
+
CsvHum = CsvHuman
|
13
|
+
CSV_HXL = CsvHuman
|
14
|
+
CSVHXL = CsvHuman
|
15
|
+
HXL = CsvHuman
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'csvreader/base'
|
4
|
+
|
5
|
+
|
6
|
+
## our own code
|
7
|
+
require 'csvhuman/version' # note: let version always go first
|
8
|
+
require 'csvhuman/tag'
|
9
|
+
require 'csvhuman/column'
|
10
|
+
require 'csvhuman/converter'
|
11
|
+
require 'csvhuman/reader'
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
# say hello
|
17
|
+
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
data/lib/csvhuman/column.rb
CHANGED
@@ -7,20 +7,41 @@ class CsvHuman
|
|
7
7
|
class Columns
|
8
8
|
|
9
9
|
|
10
|
-
def self.build( values )
|
10
|
+
def self.build( values, header_converter )
|
11
11
|
|
12
12
|
## "clean" unify/normalize names
|
13
|
-
|
13
|
+
keys = values.map do |value|
|
14
14
|
if value
|
15
15
|
if value.empty?
|
16
16
|
nil
|
17
17
|
else
|
18
18
|
## e.g. #ADM1 CODE => #adm1 +code
|
19
19
|
## POPULATION F CHILDREN AFFECTED => #population +affected +children +f
|
20
|
-
|
20
|
+
tag_key = Tag.normalize( value )
|
21
21
|
## turn empty normalized tags (e.g. "stray" hashtag) into nil too
|
22
|
-
|
23
|
-
value
|
22
|
+
|
23
|
+
if value.empty?
|
24
|
+
nil
|
25
|
+
else
|
26
|
+
header_key =
|
27
|
+
## todo/fix: pass in column index - why? why not?
|
28
|
+
## pass in column index for all columns (or only tagged ones?) or both?
|
29
|
+
## if header_converter.arity == 1 # straight converter
|
30
|
+
header_converter.call( tag_key )
|
31
|
+
## else
|
32
|
+
## header_converter.call( value, index )
|
33
|
+
## end
|
34
|
+
|
35
|
+
## note:
|
36
|
+
## return nil, "" or false to skip column
|
37
|
+
if header_key.nil? || header_key.empty? || header_key == false ## check again: skip empty "" columns
|
38
|
+
nil
|
39
|
+
else
|
40
|
+
## note: return header_key (used for returned record/hash) AND tag_key (used for type conversion config)
|
41
|
+
## lets us fold more columns into one or splat single list/array columns into many
|
42
|
+
[header_key,tag_key]
|
43
|
+
end
|
44
|
+
end
|
24
45
|
end
|
25
46
|
else # keep (nil) as is
|
26
47
|
nil
|
@@ -29,34 +50,43 @@ class Columns
|
|
29
50
|
|
30
51
|
|
31
52
|
counts = {}
|
32
|
-
|
53
|
+
keys.each_with_index do |key,i|
|
33
54
|
if key
|
34
|
-
|
35
|
-
counts[
|
55
|
+
header_key = key[0]
|
56
|
+
counts[header_key] ||= []
|
57
|
+
counts[header_key] << i
|
36
58
|
end
|
37
59
|
end
|
38
60
|
## puts "counts:"
|
39
61
|
## pp counts
|
40
62
|
|
41
|
-
|
63
|
+
|
64
|
+
## create all unique tags (used for type conversion)
|
42
65
|
tags = {}
|
43
|
-
|
44
|
-
|
66
|
+
keys.each do |key|
|
67
|
+
if key
|
68
|
+
tag_key = key[1]
|
69
|
+
tags[tag_key] ||= Tag.parse( tag_key ) ## note: "reuse" tag for all columns if same tag key
|
70
|
+
end
|
45
71
|
end
|
46
72
|
## puts "tags:"
|
47
73
|
## pp tags
|
48
74
|
|
49
75
|
|
50
76
|
cols = []
|
51
|
-
|
77
|
+
keys.each do |key|
|
52
78
|
if key
|
53
|
-
|
54
|
-
|
79
|
+
header_key = key[0]
|
80
|
+
tag_key = key[1]
|
81
|
+
|
82
|
+
count = counts[header_key]
|
83
|
+
tag = tags[tag_key] ## note: "reuse" tag for all columns if same tag key
|
84
|
+
|
55
85
|
if count.size > 1
|
56
86
|
## note: defaults to use "standard/default" tag key (as a string)
|
57
|
-
cols << Column.new(
|
87
|
+
cols << Column.new( header_key, tag, list: true )
|
58
88
|
else
|
59
|
-
cols << Column.new(
|
89
|
+
cols << Column.new( header_key, tag )
|
60
90
|
end
|
61
91
|
else
|
62
92
|
cols << Column.new
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
HEADER_CONVERTERS = {
|
7
|
+
## e.g. "#adm1 +code" => "#adm1 +code"
|
8
|
+
none: ->(value) { value },
|
9
|
+
|
10
|
+
## e.g. "#adm1 +code" => "adm1+code" (strip hashtags and whitespace)
|
11
|
+
default: ->(value) { value.downcase.gsub('#', '').
|
12
|
+
gsub( /\s+/, '' ) },
|
13
|
+
|
14
|
+
## e.g. "#adm1 +code" => :adm1_code" (strip hashtags and whitespace and turn plus (+) into underscore (_))
|
15
|
+
symbol: ->(value) { value.downcase.gsub('#', '').
|
16
|
+
gsub( /\s+/, '' ).
|
17
|
+
gsub('+', '_').
|
18
|
+
gsub( /[^\w]+/, '' ).to_sym }
|
19
|
+
}
|
20
|
+
|
21
|
+
end # class CsvHuman
|
data/lib/csvhuman/reader.rb
CHANGED
@@ -9,12 +9,13 @@ class CsvHuman
|
|
9
9
|
## - value (auto-magically) turned into an array / list
|
10
10
|
|
11
11
|
|
12
|
-
|
12
|
+
def self.open( path, mode=nil, sep: nil,
|
13
|
+
header_converter: nil, &block ) ## rename path to filename or name - why? why not?
|
13
14
|
|
14
15
|
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
15
16
|
## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
16
|
-
csv =
|
17
|
-
human = new( csv )
|
17
|
+
csv = CsvReader.open( path, mode, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
18
|
+
human = new( csv, header_converter: header_converter )
|
18
19
|
|
19
20
|
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
20
21
|
if block_given?
|
@@ -26,17 +27,19 @@ class CsvHuman
|
|
26
27
|
else
|
27
28
|
human
|
28
29
|
end
|
29
|
-
|
30
|
+
end # method self.open
|
30
31
|
|
31
32
|
|
32
|
-
def self.read( path
|
33
|
-
|
33
|
+
def self.read( path, sep: nil,
|
34
|
+
header_converter: nil )
|
35
|
+
open( path, sep: sep, header_converter: header_converter ) { |human| human.read }
|
34
36
|
end
|
35
37
|
|
36
38
|
|
37
|
-
def self.foreach( path,
|
38
|
-
|
39
|
-
|
39
|
+
def self.foreach( path, sep: nil,
|
40
|
+
header_converter: nil, &block )
|
41
|
+
csv = CsvReader.open( path, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
42
|
+
human = new( csv, header_converter: header_converter )
|
40
43
|
|
41
44
|
if block_given?
|
42
45
|
begin
|
@@ -53,8 +56,10 @@ class CsvHuman
|
|
53
56
|
end # method self.foreach
|
54
57
|
|
55
58
|
|
56
|
-
def self.parse( str_or_readable,
|
57
|
-
|
59
|
+
def self.parse( str_or_readable, sep: nil,
|
60
|
+
header_converter: nil, &block )
|
61
|
+
human = new( str_or_readable, sep: sep,
|
62
|
+
header_converter: header_converter )
|
58
63
|
|
59
64
|
if block_given?
|
60
65
|
human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
@@ -67,10 +72,11 @@ class CsvHuman
|
|
67
72
|
|
68
73
|
attr_reader :header, :tags
|
69
74
|
|
70
|
-
def initialize( recs_or_stream
|
75
|
+
def initialize( recs_or_stream, sep: nil,
|
76
|
+
header_converter: nil )
|
71
77
|
## todo/check: check if arg is a stream/enumarator - why? why not??
|
72
78
|
if recs_or_stream.is_a?( String )
|
73
|
-
@recs =
|
79
|
+
@recs = CsvReader.new( recs_or_stream, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
74
80
|
else
|
75
81
|
@recs = recs_or_stream
|
76
82
|
end
|
@@ -78,6 +84,15 @@ def initialize( recs_or_stream )
|
|
78
84
|
@header = []
|
79
85
|
@tags = nil ## add tags = nil -- why? why not?
|
80
86
|
@cols = nil ## column mappings (used "internally")
|
87
|
+
|
88
|
+
if header_converter.nil?
|
89
|
+
@header_converter = HEADER_CONVERTERS[:default]
|
90
|
+
elsif header_converter.is_a?( Symbol )
|
91
|
+
## todo/fix: check if converter is nil (not found) - raise except!!!!
|
92
|
+
@header_converter = HEADER_CONVERTERS[header_converter]
|
93
|
+
else ## assume proc; todo/fix: check it's a proc!!!
|
94
|
+
@header_converter = header_converter
|
95
|
+
end
|
81
96
|
end
|
82
97
|
|
83
98
|
|
@@ -92,7 +107,7 @@ def each( &block )
|
|
92
107
|
## pp values
|
93
108
|
if @cols.nil?
|
94
109
|
if values.any? { |value| value && value.strip.start_with?('#') }
|
95
|
-
@cols = Columns.build( values )
|
110
|
+
@cols = Columns.build( values, @header_converter )
|
96
111
|
@tags = values
|
97
112
|
else
|
98
113
|
@header << values
|
@@ -105,7 +120,7 @@ def each( &block )
|
|
105
120
|
@cols.each_with_index do |col,i|
|
106
121
|
if col.tagged?
|
107
122
|
key = col.key
|
108
|
-
value =
|
123
|
+
value = col.tag.typecast( values[i] )
|
109
124
|
if col.list?
|
110
125
|
record[ key ] ||= []
|
111
126
|
record[ key ] << value
|
data/lib/csvhuman/tag.rb
CHANGED
@@ -61,7 +61,7 @@ class Tag
|
|
61
61
|
|
62
62
|
if name == 'date'
|
63
63
|
Date
|
64
|
-
elsif ['affected', 'inneed'].include?( name )
|
64
|
+
elsif ['affected', 'inneed', 'targeted', 'reached'].include?( name )
|
65
65
|
Integer
|
66
66
|
else
|
67
67
|
## check attributes
|
@@ -71,7 +71,18 @@ class Tag
|
|
71
71
|
Integer
|
72
72
|
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
73
73
|
Date
|
74
|
-
elsif attributes.include?( '
|
74
|
+
elsif attributes.include?( 'killed' ) ||
|
75
|
+
attributes.include?( 'injured' ) ||
|
76
|
+
attributes.include?( 'infected' ) ||
|
77
|
+
attributes.include?( 'displaced' ) ||
|
78
|
+
attributes.include?( 'idps' ) ||
|
79
|
+
attributes.include?( 'refugees' ) ||
|
80
|
+
attributes.include?( 'abducted' ) ||
|
81
|
+
attributes.include?( 'threatened' ) ||
|
82
|
+
attributes.include?( 'affected' ) ||
|
83
|
+
attributes.include?( 'inneed' ) ||
|
84
|
+
attributes.include?( 'targeted' ) ||
|
85
|
+
attributes.include?( 'reached' )
|
75
86
|
Integer
|
76
87
|
else
|
77
88
|
String ## assume (default to) string
|
data/lib/csvhuman/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_header_converter.rb
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestHeaderConverter < MiniTest::Test
|
12
|
+
|
13
|
+
def conv_none( value )
|
14
|
+
CsvHuman::HEADER_CONVERTERS[:none].call( value )
|
15
|
+
end
|
16
|
+
|
17
|
+
def conv_default( value )
|
18
|
+
CsvHuman::HEADER_CONVERTERS[:default].call( value )
|
19
|
+
end
|
20
|
+
|
21
|
+
def conv_symbol( value )
|
22
|
+
CsvHuman::HEADER_CONVERTERS[:symbol].call( value )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def test_none
|
28
|
+
assert_equal "#sector", conv_none( "#sector" )
|
29
|
+
assert_equal "#adm1", conv_none( "#adm1" )
|
30
|
+
|
31
|
+
assert_equal "#sector +en", conv_none( "#sector +en" )
|
32
|
+
assert_equal "#adm1 +code", conv_none( "#adm1 +code" )
|
33
|
+
|
34
|
+
assert_equal "#affected +children +f", conv_none( "#affected +children +f" )
|
35
|
+
assert_equal "#population +affected +children +m", conv_none( "#population +affected +children +m" )
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def test_default
|
40
|
+
assert_equal "sector", conv_default( "#sector" )
|
41
|
+
assert_equal "adm1", conv_default( "#adm1" )
|
42
|
+
|
43
|
+
assert_equal "sector+en", conv_default( "#sector +en" )
|
44
|
+
assert_equal "adm1+code", conv_default( "#adm1 +code" )
|
45
|
+
|
46
|
+
assert_equal "affected+children+f", conv_default( "#affected +children +f" )
|
47
|
+
assert_equal "population+affected+children+m", conv_default( "#population +affected +children +m" )
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def test_symbol
|
52
|
+
assert_equal :sector, conv_symbol( "#sector" )
|
53
|
+
assert_equal :adm1, conv_symbol( "#adm1" )
|
54
|
+
|
55
|
+
assert_equal :sector_en, conv_symbol( "#sector +en" )
|
56
|
+
assert_equal :adm1_code, conv_symbol( "#adm1 +code" )
|
57
|
+
|
58
|
+
assert_equal :affected_children_f, conv_symbol( "#affected +children +f" )
|
59
|
+
assert_equal :population_affected_children_m, conv_symbol( "#population +affected +children +m" )
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
end # class TestHeaderConverter
|
data/test/test_reader.rb
CHANGED
@@ -50,26 +50,99 @@ def txt
|
|
50
50
|
TXT
|
51
51
|
end
|
52
52
|
|
53
|
+
def txt2
|
54
|
+
<<TXT
|
55
|
+
%%%%%%%
|
56
|
+
% some comments here
|
57
|
+
% note: you can use blank lines and/or leading and trailing spaces
|
58
|
+
|
59
|
+
What, , , Who ,Where ,For whom,
|
60
|
+
Record, Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
61
|
+
|
62
|
+
, #sector+en, #subsector, #org, #country, #sex+#targeted, #sex+#targeted, #adm1
|
63
|
+
|
64
|
+
%%%
|
65
|
+
% more comments here
|
66
|
+
|
67
|
+
001, WASH, Subsector 1, Org 1, Country 1, 100, 100, Region 1
|
68
|
+
002, Health, Subsector 2, Org 2, Country 2, , , Region 2
|
69
|
+
003, Education, Subsector 3, Org 3, Country 2, 250, 300, Region 3
|
70
|
+
004, WASH, Subsector 4, Org 1, Country 3, 80, 95, Region 4
|
71
|
+
|
72
|
+
%%%
|
73
|
+
% some more comments and blank lines
|
74
|
+
|
75
|
+
|
76
|
+
TXT
|
77
|
+
end
|
78
|
+
|
79
|
+
def txt3
|
80
|
+
<<TXT
|
81
|
+
%%%%%%%%%%%%%%%%%
|
82
|
+
% use semicolon (;) as sep(arator)
|
83
|
+
|
84
|
+
What;;;Who;Where;For whom;
|
85
|
+
Record;Sector/Cluster;Subsector;Organisation;Country;Males;Females;Subregion
|
86
|
+
;#sector+en;#subsector;#org;#country;#sex+#targeted;#sex+#targeted;#adm1
|
87
|
+
001;WASH;Subsector 1;Org 1;Country 1;100;100;Region 1
|
88
|
+
002;Health;Subsector 2;Org 2;Country 2;;;Region 2
|
89
|
+
003;Education;Subsector 3;Org 3;Country 2;250;300;Region 3
|
90
|
+
004;WASH;Subsector 4;Org 1;Country 3;80;95;Region 4
|
91
|
+
TXT
|
92
|
+
end
|
93
|
+
|
94
|
+
|
53
95
|
|
54
|
-
def
|
96
|
+
def expected_recs2
|
97
|
+
[
|
98
|
+
{"sector+en" => "WASH",
|
99
|
+
"subsector" => "Subsector 1",
|
100
|
+
"org" => "Org 1",
|
101
|
+
"country" => "Country 1",
|
102
|
+
"sex+targeted" => [100, 100],
|
103
|
+
"adm1" => "Region 1"},
|
104
|
+
{"sector+en" => "Health",
|
105
|
+
"subsector" => "Subsector 2",
|
106
|
+
"org" => "Org 2",
|
107
|
+
"country" => "Country 2",
|
108
|
+
"sex+targeted" => [nil, nil],
|
109
|
+
"adm1" => "Region 2"},
|
110
|
+
{"sector+en" => "Education",
|
111
|
+
"subsector" => "Subsector 3",
|
112
|
+
"org" => "Org 3",
|
113
|
+
"country" => "Country 2",
|
114
|
+
"sex+targeted" => [250, 300],
|
115
|
+
"adm1" => "Region 3"},
|
116
|
+
{"sector+en" => "WASH",
|
117
|
+
"subsector" => "Subsector 4",
|
118
|
+
"org" => "Org 1",
|
119
|
+
"country" => "Country 3",
|
120
|
+
"sex+targeted" => [80, 95],
|
121
|
+
"adm1" => "Region 4"}]
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
def test_basics
|
55
126
|
csv = CsvHuman.new( recs )
|
56
127
|
csv.each do |rec|
|
57
128
|
pp rec
|
58
129
|
end
|
59
130
|
|
60
|
-
pp csv.read
|
61
|
-
|
62
131
|
assert_equal expected_recs, CsvHuman.parse( recs )
|
63
132
|
assert_equal expected_recs, CsvHuman.parse( recs2 )
|
64
133
|
|
65
|
-
|
66
134
|
CsvHuman.parse( recs ).each do |rec|
|
67
135
|
pp rec
|
68
136
|
end
|
69
137
|
|
70
138
|
|
71
139
|
pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
|
72
|
-
|
140
|
+
|
141
|
+
|
142
|
+
assert_equal expected_recs2, CsvHuman.parse( txt )
|
143
|
+
assert_equal expected_recs2, CsvHuman.parse( txt2 )
|
144
|
+
|
145
|
+
|
73
146
|
CsvHuman.parse( txt ).each do |rec|
|
74
147
|
pp rec
|
75
148
|
end
|
@@ -79,4 +152,19 @@ def test_readme
|
|
79
152
|
end
|
80
153
|
end
|
81
154
|
|
155
|
+
|
156
|
+
def test_header_converter
|
157
|
+
pp CsvHuman.parse( txt2, :header_converter => :default )
|
158
|
+
pp CsvHuman.parse( txt2, :header_converter => :none )
|
159
|
+
pp CsvHuman.parse( txt2, :header_converter => :symbol )
|
160
|
+
|
161
|
+
pp CsvHuman.parse( txt2, header_converter: ->(value) { value.upcase } )
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
def test_semicolon
|
166
|
+
assert_equal expected_recs2, CsvHuman.parse( txt3, sep: ';' ) ## try with semicolon (;)
|
167
|
+
end
|
168
|
+
|
169
|
+
|
82
170
|
end # class TestReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvhuman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -68,12 +68,15 @@ files:
|
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
70
|
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/base.rb
|
71
72
|
- lib/csvhuman/column.rb
|
73
|
+
- lib/csvhuman/converter.rb
|
72
74
|
- lib/csvhuman/reader.rb
|
73
75
|
- lib/csvhuman/tag.rb
|
74
76
|
- lib/csvhuman/version.rb
|
75
77
|
- test/data/test.csv
|
76
78
|
- test/helper.rb
|
79
|
+
- test/test_header_converter.rb
|
77
80
|
- test/test_reader.rb
|
78
81
|
- test/test_tags.rb
|
79
82
|
homepage: https://github.com/csvreader/csvhuman
|