csvhuman 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/README.md +137 -11
- data/lib/csvhuman.rb +15 -25
- data/lib/csvhuman/base.rb +17 -0
- data/lib/csvhuman/column.rb +46 -16
- data/lib/csvhuman/converter.rb +21 -0
- data/lib/csvhuman/reader.rb +30 -15
- data/lib/csvhuman/tag.rb +13 -2
- data/lib/csvhuman/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_header_converter.rb +63 -0
- data/test/test_reader.rb +93 -5
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34cc6d7eb412bf02188cde5b26201cc1e5c8a46c
|
4
|
+
data.tar.gz: 4c04e8bc8678b19923a42f05d11114389ff78ab5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 404dc263217830a3dfc68e15114bd3e45a406774638574106708fed616a1fa9245becd6cab9a54889ef8335bffc0be5ca9324bb15779510fd4a5eea365156ecd
|
7
|
+
data.tar.gz: 8689b1d38c6edade697583786e595332c5a496285c778967ad8a35557290d0b08a62056b75fc194d995f571220e4836eadcbf6d5f9083ec92cc5d97b3877d9cd
|
data/Manifest.txt
CHANGED
@@ -3,11 +3,14 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/csvhuman.rb
|
6
|
+
lib/csvhuman/base.rb
|
6
7
|
lib/csvhuman/column.rb
|
8
|
+
lib/csvhuman/converter.rb
|
7
9
|
lib/csvhuman/reader.rb
|
8
10
|
lib/csvhuman/tag.rb
|
9
11
|
lib/csvhuman/version.rb
|
10
12
|
test/data/test.csv
|
11
13
|
test/helper.rb
|
14
|
+
test/test_header_converter.rb
|
12
15
|
test/test_reader.rb
|
13
16
|
test/test_tags.rb
|
data/README.md
CHANGED
@@ -79,30 +79,80 @@ resulting in:
|
|
79
79
|
"subsector" => "Subsector 1",
|
80
80
|
"org" => "Org 1",
|
81
81
|
"country" => "Country 1",
|
82
|
-
"sex+targeted" => [
|
82
|
+
"sex+targeted" => [100, 100],
|
83
83
|
"adm1" => "Region 1"},
|
84
84
|
{"sector+en" => "Health",
|
85
85
|
"subsector" => "Subsector 2",
|
86
86
|
"org" => "Org 2",
|
87
87
|
"country" => "Country 2",
|
88
|
-
"sex+targeted" => [
|
88
|
+
"sex+targeted" => [nil, nil],
|
89
89
|
"adm1" => "Region 2"},
|
90
90
|
{"sector+en" => "Education",
|
91
91
|
"subsector" => "Subsector 3",
|
92
92
|
"org" => "Org 3",
|
93
93
|
"country" => "Country 2",
|
94
|
-
"sex+targeted" => [
|
94
|
+
"sex+targeted" => [250, 300],
|
95
95
|
"adm1" => "Region 3"},
|
96
96
|
{"sector+en" => "WASH",
|
97
97
|
"subsector" => "Subsector 4",
|
98
98
|
"org" => "Org 1",
|
99
99
|
"country" => "Country 3",
|
100
|
-
"sex+targeted" => [
|
100
|
+
"sex+targeted" => [80, 95],
|
101
101
|
"adm1" => "Region 4"}]
|
102
102
|
```
|
103
103
|
|
104
104
|
|
105
|
-
|
105
|
+
### What about Enumerable?
|
106
|
+
|
107
|
+
Yes, every reader includes `Enumerable` and runs on `each`.
|
108
|
+
Use `new` or `open` without a block
|
109
|
+
to get the enumerator (iterator).
|
110
|
+
Example:
|
111
|
+
|
112
|
+
|
113
|
+
``` ruby
|
114
|
+
csv = CsvHuman.new( <<TXT ) ## or use HXL.new
|
115
|
+
What,,,Who,Where,For whom,
|
116
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
117
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
118
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
119
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
120
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
121
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
122
|
+
TXT )
|
123
|
+
it = csv.to_enum
|
124
|
+
pp it.next
|
125
|
+
# => {"sector+en" => "WASH",
|
126
|
+
# "subsector" => "Subsector 1",
|
127
|
+
# "org" => "Org 1",
|
128
|
+
# "country" => "Country 1",
|
129
|
+
# "sex+targeted" => [100, 100],
|
130
|
+
# "adm1" => "Region 1"}
|
131
|
+
|
132
|
+
|
133
|
+
# -or-
|
134
|
+
|
135
|
+
csv = CsvHuman.open( "./test.csv" ) # or use HXL.open
|
136
|
+
it = csv.to_enum
|
137
|
+
pp it.next
|
138
|
+
# => {"sector+en" => "WASH",
|
139
|
+
# "subsector" => "Subsector 1",
|
140
|
+
# "org" => "Org 1",
|
141
|
+
# "country" => "Country 1",
|
142
|
+
# "sex+targeted" => [100, 100],
|
143
|
+
# "adm1" => "Region 1"}
|
144
|
+
pp it.next
|
145
|
+
# => {"sector+en" => "Health",
|
146
|
+
# "subsector" => "Subsector 2",
|
147
|
+
# "org" => "Org 2",
|
148
|
+
# "country" => "Country 2",
|
149
|
+
# "sex+targeted" => [nil, nil],
|
150
|
+
# "adm1" => "Region 2"}
|
151
|
+
```
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
### More Ways to Use
|
106
156
|
|
107
157
|
``` ruby
|
108
158
|
csv = CsvHuman.new( recs )
|
@@ -110,8 +160,6 @@ csv.each do |rec|
|
|
110
160
|
pp rec
|
111
161
|
end
|
112
162
|
|
113
|
-
pp csv.read
|
114
|
-
|
115
163
|
|
116
164
|
CsvHuman.parse( recs ).each do |rec|
|
117
165
|
pp rec
|
@@ -136,8 +184,6 @@ hxl.each do |rec|
|
|
136
184
|
pp rec
|
137
185
|
end
|
138
186
|
|
139
|
-
pp hxl.read
|
140
|
-
|
141
187
|
|
142
188
|
HXL.parse( recs ).each do |rec|
|
143
189
|
pp rec
|
@@ -160,11 +206,91 @@ Note: More aliases for `CsvHuman`, `HXL`? Yes, you can use
|
|
160
206
|
|
161
207
|
|
162
208
|
|
163
|
-
|
209
|
+
### What about symbol keys for hashes?
|
210
|
+
|
211
|
+
Yes, you can use the `header_converter` keyword option.
|
212
|
+
Use `:symbol` for (auto-)converting header tags (strings) to symbols.
|
213
|
+
Note: the symbol converter will remove all hashtags (`#`) and spaces and
|
214
|
+
will change the plus (`+`) to underscore (`_`)
|
215
|
+
and remove all non-alphanumeric (e.g. `!?$%`) chars.
|
216
|
+
|
217
|
+
Example:
|
218
|
+
|
219
|
+
``` ruby
|
220
|
+
txt =<<TXT
|
221
|
+
What,,,Who,Where,For whom,
|
222
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
223
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
224
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
225
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
226
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
227
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
228
|
+
TXT
|
229
|
+
|
230
|
+
pp CsvHuman.parse( txt, :header_converter => :symbol ) ## or use HXL.parse
|
231
|
+
|
232
|
+
# -or-
|
233
|
+
|
234
|
+
options = { :header_converter => :symbol }
|
235
|
+
pp CsvHuman.parse( txt, options )
|
236
|
+
```
|
237
|
+
|
238
|
+
resulting in:
|
239
|
+
|
240
|
+
``` ruby
|
241
|
+
[{:sector_en => "WASH",
|
242
|
+
:subsector => "Subsector 1",
|
243
|
+
:org => "Org 1",
|
244
|
+
:country => "Country 1",
|
245
|
+
:sex_targeted => [100, 100],
|
246
|
+
:adm1 => "Region 1"},
|
247
|
+
# ...
|
248
|
+
{:sector_en => "WASH",
|
249
|
+
:subsector => "Subsector 4",
|
250
|
+
:org => "Org 1",
|
251
|
+
:country => "Country 3",
|
252
|
+
:sex_targeted => [80, 95],
|
253
|
+
:adm1 => "Region 4"}]
|
254
|
+
```
|
255
|
+
|
256
|
+
Built-in header converters include:
|
257
|
+
|
258
|
+
| Converter | Comments |
|
259
|
+
|--------------|---------------------|
|
260
|
+
| `:none` | string key; uses "normalized" tag e.g. `"#adm1 +code"` |
|
261
|
+
| `:default` | string key; strips hashtags and spaces e.g. `"admin+code"` |
|
262
|
+
| `:symbol` | symbol key; strips hashtags and spaces and converts plus (`+`) to underscore (`_`) and removes all non-alphanumerics e.g. `:admin_code` |
|
263
|
+
|
264
|
+
Or add your own converters. Example:
|
265
|
+
|
266
|
+
``` ruby
|
267
|
+
pp CsvHuman.parse( txt, header_converter: ->(h) { h.upcase } )
|
268
|
+
```
|
269
|
+
|
270
|
+
resulting in:
|
271
|
+
|
272
|
+
``` ruby
|
273
|
+
[{"#SECTOR +EN" => "WASH",
|
274
|
+
"#SUBSECTOR" => "Subsector 1",
|
275
|
+
"#ORG" => "Org 1",
|
276
|
+
"#COUNTRY" => "Country 1",
|
277
|
+
"#SEX +TARGETED" => [100, 100],
|
278
|
+
"#ADM1" => "Region 1"},
|
279
|
+
# ...
|
280
|
+
]
|
281
|
+
```
|
282
|
+
|
283
|
+
A custom header converter is a method that gets the (normalized) header tag
|
284
|
+
passed in (e.g. `#sector +en`) as a string
|
285
|
+
and returns a string or symbol to use for the hash key in records.
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
### Tag Helpers
|
164
290
|
|
165
291
|
**Normalize**. Use `CsvHuman::Tag.normalize` to pretty print or normalize a tag.
|
166
292
|
All parts get downcased (lowercased), all attributes sorted by a-to-z,
|
167
|
-
all extra or missing hashtags or pluses added or removed
|
293
|
+
all extra or missing hashtags or pluses added or removed,
|
168
294
|
all extra or missing spaces added or removed. Example:
|
169
295
|
|
170
296
|
``` ruby
|
data/lib/csvhuman.rb
CHANGED
@@ -1,25 +1,15 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require '
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
## add some "alternative" shortcut aliases
|
17
|
-
CsvHum = CsvHuman
|
18
|
-
CSV_HXL = CsvHuman
|
19
|
-
CSVHXL = CsvHuman
|
20
|
-
HXL = CsvHuman
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
# say hello
|
25
|
-
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'csvreader' ## add all "alternative" shortcut aliases
|
4
|
+
|
5
|
+
|
6
|
+
## our own code (without "top-level" shortcuts e.g. "modular version")
|
7
|
+
require 'csvhuman/base'
|
8
|
+
|
9
|
+
|
10
|
+
####
|
11
|
+
# add some "alternative" shortcut aliases
|
12
|
+
CsvHum = CsvHuman
|
13
|
+
CSV_HXL = CsvHuman
|
14
|
+
CSVHXL = CsvHuman
|
15
|
+
HXL = CsvHuman
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'csvreader/base'
|
4
|
+
|
5
|
+
|
6
|
+
## our own code
|
7
|
+
require 'csvhuman/version' # note: let version always go first
|
8
|
+
require 'csvhuman/tag'
|
9
|
+
require 'csvhuman/column'
|
10
|
+
require 'csvhuman/converter'
|
11
|
+
require 'csvhuman/reader'
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
# say hello
|
17
|
+
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
data/lib/csvhuman/column.rb
CHANGED
@@ -7,20 +7,41 @@ class CsvHuman
|
|
7
7
|
class Columns
|
8
8
|
|
9
9
|
|
10
|
-
def self.build( values )
|
10
|
+
def self.build( values, header_converter )
|
11
11
|
|
12
12
|
## "clean" unify/normalize names
|
13
|
-
|
13
|
+
keys = values.map do |value|
|
14
14
|
if value
|
15
15
|
if value.empty?
|
16
16
|
nil
|
17
17
|
else
|
18
18
|
## e.g. #ADM1 CODE => #adm1 +code
|
19
19
|
## POPULATION F CHILDREN AFFECTED => #population +affected +children +f
|
20
|
-
|
20
|
+
tag_key = Tag.normalize( value )
|
21
21
|
## turn empty normalized tags (e.g. "stray" hashtag) into nil too
|
22
|
-
|
23
|
-
value
|
22
|
+
|
23
|
+
if value.empty?
|
24
|
+
nil
|
25
|
+
else
|
26
|
+
header_key =
|
27
|
+
## todo/fix: pass in column index - why? why not?
|
28
|
+
## pass in column index for all columns (or only tagged ones?) or both?
|
29
|
+
## if header_converter.arity == 1 # straight converter
|
30
|
+
header_converter.call( tag_key )
|
31
|
+
## else
|
32
|
+
## header_converter.call( value, index )
|
33
|
+
## end
|
34
|
+
|
35
|
+
## note:
|
36
|
+
## return nil, "" or false to skip column
|
37
|
+
if header_key.nil? || header_key.empty? || header_key == false ## check again: skip empty "" columns
|
38
|
+
nil
|
39
|
+
else
|
40
|
+
## note: return header_key (used for returned record/hash) AND tag_key (used for type conversion config)
|
41
|
+
## lets us fold more columns into one or splat single list/array columns into many
|
42
|
+
[header_key,tag_key]
|
43
|
+
end
|
44
|
+
end
|
24
45
|
end
|
25
46
|
else # keep (nil) as is
|
26
47
|
nil
|
@@ -29,34 +50,43 @@ class Columns
|
|
29
50
|
|
30
51
|
|
31
52
|
counts = {}
|
32
|
-
|
53
|
+
keys.each_with_index do |key,i|
|
33
54
|
if key
|
34
|
-
|
35
|
-
counts[
|
55
|
+
header_key = key[0]
|
56
|
+
counts[header_key] ||= []
|
57
|
+
counts[header_key] << i
|
36
58
|
end
|
37
59
|
end
|
38
60
|
## puts "counts:"
|
39
61
|
## pp counts
|
40
62
|
|
41
|
-
|
63
|
+
|
64
|
+
## create all unique tags (used for type conversion)
|
42
65
|
tags = {}
|
43
|
-
|
44
|
-
|
66
|
+
keys.each do |key|
|
67
|
+
if key
|
68
|
+
tag_key = key[1]
|
69
|
+
tags[tag_key] ||= Tag.parse( tag_key ) ## note: "reuse" tag for all columns if same tag key
|
70
|
+
end
|
45
71
|
end
|
46
72
|
## puts "tags:"
|
47
73
|
## pp tags
|
48
74
|
|
49
75
|
|
50
76
|
cols = []
|
51
|
-
|
77
|
+
keys.each do |key|
|
52
78
|
if key
|
53
|
-
|
54
|
-
|
79
|
+
header_key = key[0]
|
80
|
+
tag_key = key[1]
|
81
|
+
|
82
|
+
count = counts[header_key]
|
83
|
+
tag = tags[tag_key] ## note: "reuse" tag for all columns if same tag key
|
84
|
+
|
55
85
|
if count.size > 1
|
56
86
|
## note: defaults to use "standard/default" tag key (as a string)
|
57
|
-
cols << Column.new(
|
87
|
+
cols << Column.new( header_key, tag, list: true )
|
58
88
|
else
|
59
|
-
cols << Column.new(
|
89
|
+
cols << Column.new( header_key, tag )
|
60
90
|
end
|
61
91
|
else
|
62
92
|
cols << Column.new
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
HEADER_CONVERTERS = {
|
7
|
+
## e.g. "#adm1 +code" => "#adm1 +code"
|
8
|
+
none: ->(value) { value },
|
9
|
+
|
10
|
+
## e.g. "#adm1 +code" => "adm1+code" (strip hashtags and whitespace)
|
11
|
+
default: ->(value) { value.downcase.gsub('#', '').
|
12
|
+
gsub( /\s+/, '' ) },
|
13
|
+
|
14
|
+
## e.g. "#adm1 +code" => :adm1_code" (strip hashtags and whitespace and turn plus (+) into underscore (_))
|
15
|
+
symbol: ->(value) { value.downcase.gsub('#', '').
|
16
|
+
gsub( /\s+/, '' ).
|
17
|
+
gsub('+', '_').
|
18
|
+
gsub( /[^\w]+/, '' ).to_sym }
|
19
|
+
}
|
20
|
+
|
21
|
+
end # class CsvHuman
|
data/lib/csvhuman/reader.rb
CHANGED
@@ -9,12 +9,13 @@ class CsvHuman
|
|
9
9
|
## - value (auto-magically) turned into an array / list
|
10
10
|
|
11
11
|
|
12
|
-
|
12
|
+
def self.open( path, mode=nil, sep: nil,
|
13
|
+
header_converter: nil, &block ) ## rename path to filename or name - why? why not?
|
13
14
|
|
14
15
|
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
15
16
|
## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
16
|
-
csv =
|
17
|
-
human = new( csv )
|
17
|
+
csv = CsvReader.open( path, mode, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
18
|
+
human = new( csv, header_converter: header_converter )
|
18
19
|
|
19
20
|
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
20
21
|
if block_given?
|
@@ -26,17 +27,19 @@ class CsvHuman
|
|
26
27
|
else
|
27
28
|
human
|
28
29
|
end
|
29
|
-
|
30
|
+
end # method self.open
|
30
31
|
|
31
32
|
|
32
|
-
def self.read( path
|
33
|
-
|
33
|
+
def self.read( path, sep: nil,
|
34
|
+
header_converter: nil )
|
35
|
+
open( path, sep: sep, header_converter: header_converter ) { |human| human.read }
|
34
36
|
end
|
35
37
|
|
36
38
|
|
37
|
-
def self.foreach( path,
|
38
|
-
|
39
|
-
|
39
|
+
def self.foreach( path, sep: nil,
|
40
|
+
header_converter: nil, &block )
|
41
|
+
csv = CsvReader.open( path, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
42
|
+
human = new( csv, header_converter: header_converter )
|
40
43
|
|
41
44
|
if block_given?
|
42
45
|
begin
|
@@ -53,8 +56,10 @@ class CsvHuman
|
|
53
56
|
end # method self.foreach
|
54
57
|
|
55
58
|
|
56
|
-
def self.parse( str_or_readable,
|
57
|
-
|
59
|
+
def self.parse( str_or_readable, sep: nil,
|
60
|
+
header_converter: nil, &block )
|
61
|
+
human = new( str_or_readable, sep: sep,
|
62
|
+
header_converter: header_converter )
|
58
63
|
|
59
64
|
if block_given?
|
60
65
|
human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
@@ -67,10 +72,11 @@ class CsvHuman
|
|
67
72
|
|
68
73
|
attr_reader :header, :tags
|
69
74
|
|
70
|
-
def initialize( recs_or_stream
|
75
|
+
def initialize( recs_or_stream, sep: nil,
|
76
|
+
header_converter: nil )
|
71
77
|
## todo/check: check if arg is a stream/enumarator - why? why not??
|
72
78
|
if recs_or_stream.is_a?( String )
|
73
|
-
@recs =
|
79
|
+
@recs = CsvReader.new( recs_or_stream, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object
|
74
80
|
else
|
75
81
|
@recs = recs_or_stream
|
76
82
|
end
|
@@ -78,6 +84,15 @@ def initialize( recs_or_stream )
|
|
78
84
|
@header = []
|
79
85
|
@tags = nil ## add tags = nil -- why? why not?
|
80
86
|
@cols = nil ## column mappings (used "internally")
|
87
|
+
|
88
|
+
if header_converter.nil?
|
89
|
+
@header_converter = HEADER_CONVERTERS[:default]
|
90
|
+
elsif header_converter.is_a?( Symbol )
|
91
|
+
## todo/fix: check if converter is nil (not found) - raise except!!!!
|
92
|
+
@header_converter = HEADER_CONVERTERS[header_converter]
|
93
|
+
else ## assume proc; todo/fix: check it's a proc!!!
|
94
|
+
@header_converter = header_converter
|
95
|
+
end
|
81
96
|
end
|
82
97
|
|
83
98
|
|
@@ -92,7 +107,7 @@ def each( &block )
|
|
92
107
|
## pp values
|
93
108
|
if @cols.nil?
|
94
109
|
if values.any? { |value| value && value.strip.start_with?('#') }
|
95
|
-
@cols = Columns.build( values )
|
110
|
+
@cols = Columns.build( values, @header_converter )
|
96
111
|
@tags = values
|
97
112
|
else
|
98
113
|
@header << values
|
@@ -105,7 +120,7 @@ def each( &block )
|
|
105
120
|
@cols.each_with_index do |col,i|
|
106
121
|
if col.tagged?
|
107
122
|
key = col.key
|
108
|
-
value =
|
123
|
+
value = col.tag.typecast( values[i] )
|
109
124
|
if col.list?
|
110
125
|
record[ key ] ||= []
|
111
126
|
record[ key ] << value
|
data/lib/csvhuman/tag.rb
CHANGED
@@ -61,7 +61,7 @@ class Tag
|
|
61
61
|
|
62
62
|
if name == 'date'
|
63
63
|
Date
|
64
|
-
elsif ['affected', 'inneed'].include?( name )
|
64
|
+
elsif ['affected', 'inneed', 'targeted', 'reached'].include?( name )
|
65
65
|
Integer
|
66
66
|
else
|
67
67
|
## check attributes
|
@@ -71,7 +71,18 @@ class Tag
|
|
71
71
|
Integer
|
72
72
|
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
73
73
|
Date
|
74
|
-
elsif attributes.include?( '
|
74
|
+
elsif attributes.include?( 'killed' ) ||
|
75
|
+
attributes.include?( 'injured' ) ||
|
76
|
+
attributes.include?( 'infected' ) ||
|
77
|
+
attributes.include?( 'displaced' ) ||
|
78
|
+
attributes.include?( 'idps' ) ||
|
79
|
+
attributes.include?( 'refugees' ) ||
|
80
|
+
attributes.include?( 'abducted' ) ||
|
81
|
+
attributes.include?( 'threatened' ) ||
|
82
|
+
attributes.include?( 'affected' ) ||
|
83
|
+
attributes.include?( 'inneed' ) ||
|
84
|
+
attributes.include?( 'targeted' ) ||
|
85
|
+
attributes.include?( 'reached' )
|
75
86
|
Integer
|
76
87
|
else
|
77
88
|
String ## assume (default to) string
|
data/lib/csvhuman/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_header_converter.rb
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestHeaderConverter < MiniTest::Test
|
12
|
+
|
13
|
+
def conv_none( value )
|
14
|
+
CsvHuman::HEADER_CONVERTERS[:none].call( value )
|
15
|
+
end
|
16
|
+
|
17
|
+
def conv_default( value )
|
18
|
+
CsvHuman::HEADER_CONVERTERS[:default].call( value )
|
19
|
+
end
|
20
|
+
|
21
|
+
def conv_symbol( value )
|
22
|
+
CsvHuman::HEADER_CONVERTERS[:symbol].call( value )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def test_none
|
28
|
+
assert_equal "#sector", conv_none( "#sector" )
|
29
|
+
assert_equal "#adm1", conv_none( "#adm1" )
|
30
|
+
|
31
|
+
assert_equal "#sector +en", conv_none( "#sector +en" )
|
32
|
+
assert_equal "#adm1 +code", conv_none( "#adm1 +code" )
|
33
|
+
|
34
|
+
assert_equal "#affected +children +f", conv_none( "#affected +children +f" )
|
35
|
+
assert_equal "#population +affected +children +m", conv_none( "#population +affected +children +m" )
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def test_default
|
40
|
+
assert_equal "sector", conv_default( "#sector" )
|
41
|
+
assert_equal "adm1", conv_default( "#adm1" )
|
42
|
+
|
43
|
+
assert_equal "sector+en", conv_default( "#sector +en" )
|
44
|
+
assert_equal "adm1+code", conv_default( "#adm1 +code" )
|
45
|
+
|
46
|
+
assert_equal "affected+children+f", conv_default( "#affected +children +f" )
|
47
|
+
assert_equal "population+affected+children+m", conv_default( "#population +affected +children +m" )
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def test_symbol
|
52
|
+
assert_equal :sector, conv_symbol( "#sector" )
|
53
|
+
assert_equal :adm1, conv_symbol( "#adm1" )
|
54
|
+
|
55
|
+
assert_equal :sector_en, conv_symbol( "#sector +en" )
|
56
|
+
assert_equal :adm1_code, conv_symbol( "#adm1 +code" )
|
57
|
+
|
58
|
+
assert_equal :affected_children_f, conv_symbol( "#affected +children +f" )
|
59
|
+
assert_equal :population_affected_children_m, conv_symbol( "#population +affected +children +m" )
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
end # class TestHeaderConverter
|
data/test/test_reader.rb
CHANGED
@@ -50,26 +50,99 @@ def txt
|
|
50
50
|
TXT
|
51
51
|
end
|
52
52
|
|
53
|
+
def txt2
|
54
|
+
<<TXT
|
55
|
+
%%%%%%%
|
56
|
+
% some comments here
|
57
|
+
% note: you can use blank lines and/or leading and trailing spaces
|
58
|
+
|
59
|
+
What, , , Who ,Where ,For whom,
|
60
|
+
Record, Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
61
|
+
|
62
|
+
, #sector+en, #subsector, #org, #country, #sex+#targeted, #sex+#targeted, #adm1
|
63
|
+
|
64
|
+
%%%
|
65
|
+
% more comments here
|
66
|
+
|
67
|
+
001, WASH, Subsector 1, Org 1, Country 1, 100, 100, Region 1
|
68
|
+
002, Health, Subsector 2, Org 2, Country 2, , , Region 2
|
69
|
+
003, Education, Subsector 3, Org 3, Country 2, 250, 300, Region 3
|
70
|
+
004, WASH, Subsector 4, Org 1, Country 3, 80, 95, Region 4
|
71
|
+
|
72
|
+
%%%
|
73
|
+
% some more comments and blank lines
|
74
|
+
|
75
|
+
|
76
|
+
TXT
|
77
|
+
end
|
78
|
+
|
79
|
+
def txt3
|
80
|
+
<<TXT
|
81
|
+
%%%%%%%%%%%%%%%%%
|
82
|
+
% use semicolon (;) as sep(arator)
|
83
|
+
|
84
|
+
What;;;Who;Where;For whom;
|
85
|
+
Record;Sector/Cluster;Subsector;Organisation;Country;Males;Females;Subregion
|
86
|
+
;#sector+en;#subsector;#org;#country;#sex+#targeted;#sex+#targeted;#adm1
|
87
|
+
001;WASH;Subsector 1;Org 1;Country 1;100;100;Region 1
|
88
|
+
002;Health;Subsector 2;Org 2;Country 2;;;Region 2
|
89
|
+
003;Education;Subsector 3;Org 3;Country 2;250;300;Region 3
|
90
|
+
004;WASH;Subsector 4;Org 1;Country 3;80;95;Region 4
|
91
|
+
TXT
|
92
|
+
end
|
93
|
+
|
94
|
+
|
53
95
|
|
54
|
-
def
|
96
|
+
def expected_recs2
|
97
|
+
[
|
98
|
+
{"sector+en" => "WASH",
|
99
|
+
"subsector" => "Subsector 1",
|
100
|
+
"org" => "Org 1",
|
101
|
+
"country" => "Country 1",
|
102
|
+
"sex+targeted" => [100, 100],
|
103
|
+
"adm1" => "Region 1"},
|
104
|
+
{"sector+en" => "Health",
|
105
|
+
"subsector" => "Subsector 2",
|
106
|
+
"org" => "Org 2",
|
107
|
+
"country" => "Country 2",
|
108
|
+
"sex+targeted" => [nil, nil],
|
109
|
+
"adm1" => "Region 2"},
|
110
|
+
{"sector+en" => "Education",
|
111
|
+
"subsector" => "Subsector 3",
|
112
|
+
"org" => "Org 3",
|
113
|
+
"country" => "Country 2",
|
114
|
+
"sex+targeted" => [250, 300],
|
115
|
+
"adm1" => "Region 3"},
|
116
|
+
{"sector+en" => "WASH",
|
117
|
+
"subsector" => "Subsector 4",
|
118
|
+
"org" => "Org 1",
|
119
|
+
"country" => "Country 3",
|
120
|
+
"sex+targeted" => [80, 95],
|
121
|
+
"adm1" => "Region 4"}]
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
def test_basics
|
55
126
|
csv = CsvHuman.new( recs )
|
56
127
|
csv.each do |rec|
|
57
128
|
pp rec
|
58
129
|
end
|
59
130
|
|
60
|
-
pp csv.read
|
61
|
-
|
62
131
|
assert_equal expected_recs, CsvHuman.parse( recs )
|
63
132
|
assert_equal expected_recs, CsvHuman.parse( recs2 )
|
64
133
|
|
65
|
-
|
66
134
|
CsvHuman.parse( recs ).each do |rec|
|
67
135
|
pp rec
|
68
136
|
end
|
69
137
|
|
70
138
|
|
71
139
|
pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
|
72
|
-
|
140
|
+
|
141
|
+
|
142
|
+
assert_equal expected_recs2, CsvHuman.parse( txt )
|
143
|
+
assert_equal expected_recs2, CsvHuman.parse( txt2 )
|
144
|
+
|
145
|
+
|
73
146
|
CsvHuman.parse( txt ).each do |rec|
|
74
147
|
pp rec
|
75
148
|
end
|
@@ -79,4 +152,19 @@ def test_readme
|
|
79
152
|
end
|
80
153
|
end
|
81
154
|
|
155
|
+
|
156
|
+
def test_header_converter
|
157
|
+
pp CsvHuman.parse( txt2, :header_converter => :default )
|
158
|
+
pp CsvHuman.parse( txt2, :header_converter => :none )
|
159
|
+
pp CsvHuman.parse( txt2, :header_converter => :symbol )
|
160
|
+
|
161
|
+
pp CsvHuman.parse( txt2, header_converter: ->(value) { value.upcase } )
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
def test_semicolon
|
166
|
+
assert_equal expected_recs2, CsvHuman.parse( txt3, sep: ';' ) ## try with semicolon (;)
|
167
|
+
end
|
168
|
+
|
169
|
+
|
82
170
|
end # class TestReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvhuman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -68,12 +68,15 @@ files:
|
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
70
|
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/base.rb
|
71
72
|
- lib/csvhuman/column.rb
|
73
|
+
- lib/csvhuman/converter.rb
|
72
74
|
- lib/csvhuman/reader.rb
|
73
75
|
- lib/csvhuman/tag.rb
|
74
76
|
- lib/csvhuman/version.rb
|
75
77
|
- test/data/test.csv
|
76
78
|
- test/helper.rb
|
79
|
+
- test/test_header_converter.rb
|
77
80
|
- test/test_reader.rb
|
78
81
|
- test/test_tags.rb
|
79
82
|
homepage: https://github.com/csvreader/csvhuman
|