csvhuman 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/README.md +213 -1
- data/lib/csvhuman.rb +2 -1
- data/lib/csvhuman/column.rb +89 -0
- data/lib/csvhuman/reader.rb +4 -69
- data/lib/csvhuman/tag.rb +162 -0
- data/lib/csvhuman/version.rb +1 -1
- data/test/test_reader.rb +24 -1
- data/test/test_tags.rb +106 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e03d4dc51acff7d6b47f1648abb47cfaa2a9028
|
4
|
+
data.tar.gz: b4921c44a67c57feae5c1f62eff5aa87ef81c996
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 675050a1e5af601ea6634fe17c0dcea511c917170438469c5f09349e4bd26678b5d42cc7cd5b9c97c2455b61ea67cd5719bff7c4637971849ae056955d562f2b
|
7
|
+
data.tar.gz: 9a7da3cdf466ebfec142344c505558b2c86fd38bee9c3b7d766c69cd0d127e5c42f4f854a6f61df7e4c57610ea4b2bec3bbdc121821a9c72b875cb470af2b50f
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -10,9 +10,221 @@ csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Lang
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
+
|
14
|
+
## What's Humanitarian eXchange Language (HXL)?
|
15
|
+
|
16
|
+
[Humanitarian eXchange Language (HXL)](https://github.com/csvspecs/csv-hxl)
|
17
|
+
is a (meta data) convention for
|
18
|
+
adding agreed on hashtags e.g. `#org,#country,#sex+#targeted,#adm1`
|
19
|
+
inline in a (single new line / row)
|
20
|
+
between the last header row and the first data row
|
21
|
+
for sharing tabular data across organisations
|
22
|
+
(during a humanitarian crisis).
|
23
|
+
Example:
|
24
|
+
|
25
|
+
|
26
|
+
```
|
27
|
+
What,,,Who,Where,For whom,
|
28
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
29
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
30
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
31
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
32
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
33
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
34
|
+
```
|
35
|
+
|
36
|
+
|
13
37
|
## Usage
|
14
38
|
|
15
|
-
to
|
39
|
+
Pass in an array of arrays (or a stream responding to `#each` with an array of strings).
|
40
|
+
Example:
|
41
|
+
|
42
|
+
|
43
|
+
``` ruby
|
44
|
+
pp CsvHuman.parse( [["Organisation", "Cluster", "Province" ], ## or use HXL.parse
|
45
|
+
[ "#org", "#sector", "#adm1" ],
|
46
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
47
|
+
[ "Org B", "Health", "Mountain Province" ],
|
48
|
+
[ "Org C", "Education", "Coastal Province" ],
|
49
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
50
|
+
```
|
51
|
+
|
52
|
+
resulting in:
|
53
|
+
|
54
|
+
``` ruby
|
55
|
+
[{"org" => "Org A", "sector" => "WASH", "adm1" => "Coastal Province"},
|
56
|
+
{"org" => "Org B", "sector" => "Health", "adm1" => "Mountain Province"},
|
57
|
+
{"org" => "Org C", "sector" => "Education", "adm1" => "Coastal Province"},
|
58
|
+
{"org" => "Org A", "sector" => "WASH", "adm1" => "Plains Province"}]
|
59
|
+
```
|
60
|
+
|
61
|
+
Or pass in the text. Example:
|
62
|
+
|
63
|
+
``` ruby
|
64
|
+
pp CsvHuman.parse( <<TXT ) ## or use HXL.parse
|
65
|
+
What,,,Who,Where,For whom,
|
66
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
67
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
68
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
69
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
70
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
71
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
72
|
+
TXT
|
73
|
+
```
|
74
|
+
|
75
|
+
resulting in:
|
76
|
+
|
77
|
+
```
|
78
|
+
[{"sector+en" => "WASH",
|
79
|
+
"subsector" => "Subsector 1",
|
80
|
+
"org" => "Org 1",
|
81
|
+
"country" => "Country 1",
|
82
|
+
"sex+targeted" => ["100", "100"],
|
83
|
+
"adm1" => "Region 1"},
|
84
|
+
{"sector+en" => "Health",
|
85
|
+
"subsector" => "Subsector 2",
|
86
|
+
"org" => "Org 2",
|
87
|
+
"country" => "Country 2",
|
88
|
+
"sex+targeted" => ["", ""],
|
89
|
+
"adm1" => "Region 2"},
|
90
|
+
{"sector+en" => "Education",
|
91
|
+
"subsector" => "Subsector 3",
|
92
|
+
"org" => "Org 3",
|
93
|
+
"country" => "Country 2",
|
94
|
+
"sex+targeted" => ["250", "300"],
|
95
|
+
"adm1" => "Region 3"},
|
96
|
+
{"sector+en" => "WASH",
|
97
|
+
"subsector" => "Subsector 4",
|
98
|
+
"org" => "Org 1",
|
99
|
+
"country" => "Country 3",
|
100
|
+
"sex+targeted" => ["80", "95"],
|
101
|
+
"adm1" => "Region 4"}]
|
102
|
+
```
|
103
|
+
|
104
|
+
|
105
|
+
More ways to use the reader:
|
106
|
+
|
107
|
+
``` ruby
|
108
|
+
csv = CsvHuman.new( recs )
|
109
|
+
csv.each do |rec|
|
110
|
+
pp rec
|
111
|
+
end
|
112
|
+
|
113
|
+
pp csv.read
|
114
|
+
|
115
|
+
|
116
|
+
CsvHuman.parse( recs ).each do |rec|
|
117
|
+
pp rec
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
pp CsvHuman.read( "./test.csv" )
|
122
|
+
|
123
|
+
CsvHuman.foreach( "./test.csv" ) do |rec|
|
124
|
+
pp rec
|
125
|
+
end
|
126
|
+
|
127
|
+
#...
|
128
|
+
|
129
|
+
```
|
130
|
+
|
131
|
+
or use the `HXL` alias:
|
132
|
+
|
133
|
+
``` ruby
|
134
|
+
hxl = HXL.new( recs )
|
135
|
+
hxl.each do |rec|
|
136
|
+
pp rec
|
137
|
+
end
|
138
|
+
|
139
|
+
pp hxl.read
|
140
|
+
|
141
|
+
|
142
|
+
HXL.parse( recs ).each do |rec|
|
143
|
+
pp rec
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
pp HXL.read( "./test.csv" )
|
148
|
+
|
149
|
+
HXL.foreach( "./test.csv" ) do |rec|
|
150
|
+
pp rec
|
151
|
+
end
|
152
|
+
|
153
|
+
#...
|
154
|
+
```
|
155
|
+
|
156
|
+
Note: More aliases for `CsvHuman`, `HXL`? Yes, you can use
|
157
|
+
`CsvHum`, `CSV_HXL`, `CSVHXL` too.
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
## Tag Helpers
|
164
|
+
|
165
|
+
**Normalize**. Use `CsvHuman::Tag.normalize` to pretty print or normalize a tag.
|
166
|
+
All parts get downcased (lowercased), all attributes sorted by a-to-z,
|
167
|
+
all extra or missing hashtags or pluses added or removed
|
168
|
+
all extra or missing spaces added or removed. Example:
|
169
|
+
|
170
|
+
``` ruby
|
171
|
+
HXL::Tag.normalize( "#sector+en" )
|
172
|
+
# => "#sector +en"
|
173
|
+
HXL::Tag.normalize( "#SECTOR EN" )
|
174
|
+
# => "#sector +en"
|
175
|
+
HXL::Tag.normalize( "# SECTOR + #EN " )
|
176
|
+
# => "#sector +en"
|
177
|
+
HXL::Tag.normalize( "SECTOR EN" )
|
178
|
+
# => "#sector +en"
|
179
|
+
# ...
|
180
|
+
```
|
181
|
+
|
182
|
+
|
183
|
+
**Split**. Use `CsvHuman::Tag.split` to split (and normalize) a tag into its parts.
|
184
|
+
Example:
|
185
|
+
|
186
|
+
``` ruby
|
187
|
+
HXL::Tag.split( "#sector+en" )
|
188
|
+
# => ["sector", "en"]
|
189
|
+
HXL::Tag.split( "#SECTOR EN" )
|
190
|
+
# => ["sector", "en"]
|
191
|
+
HXL::Tag.split( "# SECTOR + #EN " )
|
192
|
+
# => ["sector", "en"]
|
193
|
+
HXL::Tag.split( "SECTOR EN" )
|
194
|
+
# => ["sector", "en"]
|
195
|
+
|
196
|
+
## sort attributes a-to-z
|
197
|
+
HXL::Tag.split( "#affected +f +children" )
|
198
|
+
# => ["affected", "children", "f"]
|
199
|
+
HXL::Tag.split( "#population +children +affected +m" )
|
200
|
+
# => ["population", "affected", "children", "m"]
|
201
|
+
HXL::Tag.split( "#population+children+affected+m" )
|
202
|
+
# => ["population", "affected", "children", "m"]
|
203
|
+
HXL::Tag.split( "#population+#children+#affected+#m" )
|
204
|
+
# => ["population", "affected", "children", "m"]
|
205
|
+
HXL::Tag.split( "#population #children #affected #m" )
|
206
|
+
# => ["population", "affected", "children", "m"]
|
207
|
+
HXL::Tag.split( "POPULATION CHILDREN AFFECTED M" )
|
208
|
+
# => ["population", "affected", "children", "m"]
|
209
|
+
#...
|
210
|
+
```
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
## Frequently Asked Questions (FAQ) and Answers
|
216
|
+
|
217
|
+
|
218
|
+
### Q: How to deal with un-tagged fields?
|
219
|
+
|
220
|
+
**A**: Un-tagged fields get skipped / ignored.
|
221
|
+
|
222
|
+
|
223
|
+
### Q: How to deal with duplicate / repeated fields (e.g. `#sex+#targeted,#sex+#targeted`)?
|
224
|
+
|
225
|
+
**A**: Repeated fields (auto-magically) get turned into an array / list.
|
226
|
+
|
227
|
+
|
16
228
|
|
17
229
|
|
18
230
|
## License
|
data/lib/csvhuman.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'pp'
|
4
|
-
require 'logger'
|
5
4
|
|
6
5
|
|
7
6
|
require 'csvreader'
|
8
7
|
|
9
8
|
## our own code
|
10
9
|
require 'csvhuman/version' # note: let version always go first
|
10
|
+
require 'csvhuman/tag'
|
11
|
+
require 'csvhuman/column'
|
11
12
|
require 'csvhuman/reader'
|
12
13
|
|
13
14
|
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
|
7
|
+
class Columns
|
8
|
+
|
9
|
+
|
10
|
+
def self.build( values )
|
11
|
+
|
12
|
+
## "clean" unify/normalize names
|
13
|
+
tag_keys = values.map do |value|
|
14
|
+
if value
|
15
|
+
if value.empty?
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
## e.g. #ADM1 CODE => #adm1 +code
|
19
|
+
## POPULATION F CHILDREN AFFECTED => #population +affected +children +f
|
20
|
+
value = Tag.normalize( value )
|
21
|
+
## turn empty normalized tags (e.g. "stray" hashtag) into nil too
|
22
|
+
value = nil if value.empty?
|
23
|
+
value
|
24
|
+
end
|
25
|
+
else # keep (nil) as is
|
26
|
+
nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
counts = {}
|
32
|
+
tag_keys.each_with_index do |key,i|
|
33
|
+
if key
|
34
|
+
counts[key] ||= []
|
35
|
+
counts[key] << i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
## puts "counts:"
|
39
|
+
## pp counts
|
40
|
+
|
41
|
+
## create all unique tags
|
42
|
+
tags = {}
|
43
|
+
counts.each_key do |key|
|
44
|
+
tags[key] = Tag.parse( key )
|
45
|
+
end
|
46
|
+
## puts "tags:"
|
47
|
+
## pp tags
|
48
|
+
|
49
|
+
|
50
|
+
cols = []
|
51
|
+
tag_keys.each do |key|
|
52
|
+
if key
|
53
|
+
count = counts[key]
|
54
|
+
tag = tags[key] ## note: "reuse" tag for all columns if list
|
55
|
+
if count.size > 1
|
56
|
+
## note: defaults to use "standard/default" tag key (as a string)
|
57
|
+
cols << Column.new( tag.key, tag, list: true )
|
58
|
+
else
|
59
|
+
cols << Column.new( tag.key, tag )
|
60
|
+
end
|
61
|
+
else
|
62
|
+
cols << Column.new
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
cols
|
67
|
+
end
|
68
|
+
end ## class Columns
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
class Column
|
74
|
+
attr_reader :key # used for record (record key); note: list columns must use the same key
|
75
|
+
attr_reader :tag
|
76
|
+
|
77
|
+
|
78
|
+
def initialize( key=nil, tag=nil, list: false )
|
79
|
+
@key = key
|
80
|
+
@tag = tag
|
81
|
+
@list = list
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
def tagged?() @tag.nil? == false; end
|
86
|
+
def list?() @list; end
|
87
|
+
end # class Column
|
88
|
+
|
89
|
+
end # class CsvHuman
|
data/lib/csvhuman/reader.rb
CHANGED
@@ -65,21 +65,6 @@ class CsvHuman
|
|
65
65
|
|
66
66
|
|
67
67
|
|
68
|
-
|
69
|
-
class Column
|
70
|
-
attr_reader :tag
|
71
|
-
|
72
|
-
def initialize( tag=nil, list: false )
|
73
|
-
@tag = tag
|
74
|
-
@list = list
|
75
|
-
end
|
76
|
-
|
77
|
-
def tagged?() @tag.nil? == false; end
|
78
|
-
def list?() @list; end
|
79
|
-
end # class Column
|
80
|
-
|
81
|
-
|
82
|
-
|
83
68
|
attr_reader :header, :tags
|
84
69
|
|
85
70
|
def initialize( recs_or_stream )
|
@@ -106,8 +91,8 @@ def each( &block )
|
|
106
91
|
@recs.each do |values|
|
107
92
|
## pp values
|
108
93
|
if @cols.nil?
|
109
|
-
if values.any? { |value| value && value.start_with?('#') }
|
110
|
-
@cols =
|
94
|
+
if values.any? { |value| value && value.strip.start_with?('#') }
|
95
|
+
@cols = Columns.build( values )
|
111
96
|
@tags = values
|
112
97
|
else
|
113
98
|
@header << values
|
@@ -119,8 +104,8 @@ def each( &block )
|
|
119
104
|
record = {}
|
120
105
|
@cols.each_with_index do |col,i|
|
121
106
|
if col.tagged?
|
122
|
-
key = col.
|
123
|
-
value = values[i]
|
107
|
+
key = col.key
|
108
|
+
value = values[i] ## todo/fix: use col.tag.typecast( values[i] )
|
124
109
|
if col.list?
|
125
110
|
record[ key ] ||= []
|
126
111
|
record[ key ] << value
|
@@ -144,54 +129,4 @@ def read() to_a; end # method read
|
|
144
129
|
## add closed? and close
|
145
130
|
## if self.open used without block (user needs to close file "manually")
|
146
131
|
|
147
|
-
|
148
|
-
####
|
149
|
-
# helpers
|
150
|
-
|
151
|
-
|
152
|
-
def build_cols( values )
|
153
|
-
|
154
|
-
## "clean" unify/normalize names
|
155
|
-
values = values.map do |value|
|
156
|
-
if value
|
157
|
-
if value.empty?
|
158
|
-
nil ## make untagged fields nil
|
159
|
-
else
|
160
|
-
## todo: sort attributes by a-to-z
|
161
|
-
## strip / remove all spaces
|
162
|
-
value.strip.gsub('#','') ## remove leading # - why? why not?
|
163
|
-
end
|
164
|
-
else
|
165
|
-
value ## keep (nil) as is
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
|
170
|
-
counts = {}
|
171
|
-
values.each_with_index do |value,i|
|
172
|
-
if value
|
173
|
-
counts[value] ||= []
|
174
|
-
counts[value] << i
|
175
|
-
end
|
176
|
-
end
|
177
|
-
## pp counts
|
178
|
-
|
179
|
-
|
180
|
-
cols = []
|
181
|
-
values.each do |value|
|
182
|
-
if value
|
183
|
-
count = counts[value]
|
184
|
-
if count.size > 1
|
185
|
-
cols << Column.new( value, list: true )
|
186
|
-
else
|
187
|
-
cols << Column.new( value )
|
188
|
-
end
|
189
|
-
else
|
190
|
-
cols << Column.new
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
cols
|
195
|
-
end
|
196
|
-
|
197
132
|
end # class CsvHuman
|
data/lib/csvhuman/tag.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
class Tag
|
8
|
+
|
9
|
+
## 1) plus (with optional hashtag and/or optional leading and trailing spaces)
|
10
|
+
## 2) hashtag (with optional leading and trailing spaces)
|
11
|
+
## 3) spaces only (not followed by plus) or
|
12
|
+
## note: plus pattern must go first (otherwise "sector + en" becomes ["sector", "", "en"])
|
13
|
+
SEP_REGEX = /(?: \s*\++
|
14
|
+
(?:\s*\#+)?
|
15
|
+
\s* )
|
16
|
+
|
|
17
|
+
(?: \s*\#+\s* )
|
18
|
+
|
|
19
|
+
(?: \s+)
|
20
|
+
/x ## check if \s includes space AND tab?
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def self.split( value )
|
25
|
+
value = value.strip
|
26
|
+
value = value.downcase
|
27
|
+
while value.start_with?('#') do ## allow one or more hashes
|
28
|
+
value = value[1..-1] ## remove leading #
|
29
|
+
value = value.strip ## strip (optional) leading spaces (again)
|
30
|
+
end
|
31
|
+
## pp value
|
32
|
+
parts = value.split( SEP_REGEX )
|
33
|
+
|
34
|
+
## sort attributes a-z
|
35
|
+
if parts.size > 2
|
36
|
+
[parts[0]] + parts[1..-1].sort
|
37
|
+
else
|
38
|
+
parts
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
def self.normalize( value ) ## todo: rename to pretty or something or add alias
|
44
|
+
parts = split( value )
|
45
|
+
name = parts[0]
|
46
|
+
attributes = parts[1..-1] ## note: might be nil
|
47
|
+
|
48
|
+
buf = ''
|
49
|
+
if name ## note: name might be nil too e.g. value = "" or value = " "
|
50
|
+
buf << '#' + name
|
51
|
+
if attributes && attributes.size > 0
|
52
|
+
buf << ' +'
|
53
|
+
buf << attributes.join(' +')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
buf
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def self.guess_type( name, attributes )
|
61
|
+
|
62
|
+
if name == 'date'
|
63
|
+
Date
|
64
|
+
elsif ['affected', 'inneed'].include?( name )
|
65
|
+
Integer
|
66
|
+
else
|
67
|
+
## check attributes
|
68
|
+
if attributes.nil? || attributes.empty?
|
69
|
+
String ## assume (default to) string
|
70
|
+
elsif attributes.include?( 'num' )
|
71
|
+
Integer
|
72
|
+
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
73
|
+
Date
|
74
|
+
elsif attributes.include?( 'affected' )
|
75
|
+
Integer
|
76
|
+
else
|
77
|
+
String ## assume (default to) string
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
def self.parse( value )
|
85
|
+
parts = split( value )
|
86
|
+
|
87
|
+
name = parts[0]
|
88
|
+
attributes = parts[1..-1] ## todo/fix: check if nil (make it empty array [] always) - why? why not?
|
89
|
+
type = guess_type( name, attributes )
|
90
|
+
|
91
|
+
new( name, attributes, type )
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
attr_reader :name
|
98
|
+
attr_reader :attributes ## use attribs or something shorter - why? why not?
|
99
|
+
attr_reader :type
|
100
|
+
|
101
|
+
def initialize( name, attributes=nil, type=String )
|
102
|
+
@name = name
|
103
|
+
## sorted a-z - note: make sure attributes is [] NOT nil if empty - why? why not?
|
104
|
+
@attributes = attributes || []
|
105
|
+
@type = type ## type class (defaults to String)
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
def key
|
110
|
+
## convenience short cut for "standard/default" string key
|
111
|
+
## cache/pre-built/memoize - why? why not?
|
112
|
+
## builds:
|
113
|
+
## population+affected+children+f
|
114
|
+
|
115
|
+
buf = ''
|
116
|
+
buf << @name
|
117
|
+
if @attributes && @attributes.size > 0
|
118
|
+
buf << '+'
|
119
|
+
buf << @attributes.join('+')
|
120
|
+
end
|
121
|
+
buf
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_s
|
125
|
+
## cache/pre-built/memoize - why? why not?
|
126
|
+
##
|
127
|
+
## builds
|
128
|
+
## #population +affected +children +f
|
129
|
+
|
130
|
+
buf = ''
|
131
|
+
buf << '#' + @name
|
132
|
+
if @attributes && @attributes.size > 0
|
133
|
+
buf << ' +'
|
134
|
+
buf << @attributes.join(' +')
|
135
|
+
end
|
136
|
+
buf
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def typecast( value ) ## use convert or call - why? why not?
|
141
|
+
if @type == Integer
|
142
|
+
conv_to_i( value )
|
143
|
+
else ## assume String
|
144
|
+
# pass through as is
|
145
|
+
value
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
def conv_to_i( value )
|
151
|
+
if value.nil? || value.empty?
|
152
|
+
nil ## return nil - why? why not?
|
153
|
+
else
|
154
|
+
Integer( value )
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
end # class Tag
|
160
|
+
|
161
|
+
|
162
|
+
end # class CsvHuman
|
data/lib/csvhuman/version.rb
CHANGED
data/test/test_reader.rb
CHANGED
@@ -18,6 +18,26 @@ def recs
|
|
18
18
|
[ "Org A", "WASH", "Plains Province" ]]
|
19
19
|
end
|
20
20
|
|
21
|
+
def recs2
|
22
|
+
[["Organisation", "Cluster", "Province" ],
|
23
|
+
[ "ORG", "#SECTOR", "ADM1" ],
|
24
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
25
|
+
[ "Org B", "Health", "Mountain Province" ],
|
26
|
+
[ "Org C", "Education", "Coastal Province" ],
|
27
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def expected_recs
|
32
|
+
[{"org"=>"Org A", "sector"=>"WASH", "adm1"=>"Coastal Province"},
|
33
|
+
{"org"=>"Org B", "sector"=>"Health", "adm1"=>"Mountain Province"},
|
34
|
+
{"org"=>"Org C", "sector"=>"Education", "adm1"=>"Coastal Province"},
|
35
|
+
{"org"=>"Org A", "sector"=>"WASH", "adm1"=>"Plains Province"}]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
21
41
|
def txt
|
22
42
|
<<TXT
|
23
43
|
What,,,Who,Where,For whom,
|
@@ -38,7 +58,10 @@ def test_readme
|
|
38
58
|
end
|
39
59
|
|
40
60
|
pp csv.read
|
41
|
-
|
61
|
+
|
62
|
+
assert_equal expected_recs, CsvHuman.parse( recs )
|
63
|
+
assert_equal expected_recs, CsvHuman.parse( recs2 )
|
64
|
+
|
42
65
|
|
43
66
|
CsvHuman.parse( recs ).each do |rec|
|
44
67
|
pp rec
|
data/test/test_tags.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_tags.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestTags < MiniTest::Test
|
11
|
+
|
12
|
+
def split( value )
|
13
|
+
CsvHuman::Tag.split( value ) ## returns an array of strings (name+attributes[])
|
14
|
+
end
|
15
|
+
|
16
|
+
def normalize( value )
|
17
|
+
CsvHuman::Tag.normalize( value ) ## returns a string
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse( value )
|
21
|
+
CsvHuman::Tag.parse( value ) ## returns a Tag class
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
def test_split
|
27
|
+
assert_equal [], split( "" ) # empty
|
28
|
+
assert_equal [], split( " " ) # empty
|
29
|
+
|
30
|
+
## more empties (all matched by separator regex/pattern)
|
31
|
+
## keep as empty - why? why not?
|
32
|
+
assert_equal [], split( " # " ) # empty
|
33
|
+
assert_equal [], split( " ## " ) # empty
|
34
|
+
assert_equal [], split( " + " ) # empty
|
35
|
+
assert_equal [], split( " +++ " ) # empty
|
36
|
+
assert_equal [], split( " +++## " ) # empty
|
37
|
+
|
38
|
+
|
39
|
+
assert_equal ["sector", "en"], split( "#sector+en" )
|
40
|
+
assert_equal ["sector", "en"], split( "#SECTOR EN" )
|
41
|
+
assert_equal ["sector", "en"], split( " # SECTOR + EN " )
|
42
|
+
assert_equal ["sector", "en"], split( "SeCtOr en" )
|
43
|
+
assert_equal ["sector", "en"], split( "#sector#en" )
|
44
|
+
assert_equal ["sector", "en"], split( "#sector+#en" ) ## allow (optional) hash for attributes
|
45
|
+
assert_equal ["sector", "en"], split( "##sector#en" ) ## allow hash only for attributes
|
46
|
+
assert_equal ["sector", "en"], split( "# #sector+++ ##en" ) ## allow one or more plus or hashes (typos) for attibutes
|
47
|
+
|
48
|
+
|
49
|
+
assert_equal ["adm1", "code"], split( "#ADM1 +CODE" )
|
50
|
+
assert_equal ["adm1", "code"], split( " # ADM1 + CODE" )
|
51
|
+
assert_equal ["adm1", "code"], split( "ADM1 CODE" )
|
52
|
+
|
53
|
+
## sort attributes a-to-z
|
54
|
+
assert_equal ["affected", "children", "f"], split( "#affected +f +children" )
|
55
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population +children +affected +m" )
|
56
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population+children+affected+m" )
|
57
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population+#children+#affected+#m" )
|
58
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population #children #affected #m" )
|
59
|
+
assert_equal ["population", "affected", "children", "m"], split( "POPULATION CHILDREN AFFECTED M" )
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
def test_normalize
|
64
|
+
assert_equal "", normalize( "" ) # empty
|
65
|
+
assert_equal "", normalize( " " ) # empty
|
66
|
+
|
67
|
+
assert_equal "#sector +en", normalize( "#sector+en" )
|
68
|
+
assert_equal "#sector +en", normalize( "#SECTOR EN" )
|
69
|
+
assert_equal "#sector +en", normalize( " # SECTOR + EN " )
|
70
|
+
assert_equal "#sector +en", normalize( " # SECTOR # EN " )
|
71
|
+
assert_equal "#sector +en", normalize( "SeCToR en" )
|
72
|
+
|
73
|
+
assert_equal "#adm1 +code", normalize( "#ADM1 +CODE" )
|
74
|
+
assert_equal "#adm1 +code", normalize( " # ADM1 + CODE" )
|
75
|
+
assert_equal "#adm1 +code", normalize( " # ADM1 + #CODE" )
|
76
|
+
assert_equal "#adm1 +code", normalize( "ADM1 Code" )
|
77
|
+
|
78
|
+
## sort attributes a-to-z
|
79
|
+
assert_equal "#affected +children +f", normalize( "#affected +f +children" )
|
80
|
+
assert_equal "#population +affected +children +m", normalize( "#population +children +affected +m" )
|
81
|
+
assert_equal "#population +affected +children +m", normalize( "#population+children+affected+m" )
|
82
|
+
assert_equal "#population +affected +children +m", normalize( "POPULATION CHILDREN AFFECTED M" )
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def test_parse
|
87
|
+
tag = parse( "#sector+en" )
|
88
|
+
assert_equal "#sector +en", tag.to_s
|
89
|
+
assert_equal "sector", tag.name
|
90
|
+
assert_equal ["en"], tag.attributes
|
91
|
+
assert_equal String, tag.type
|
92
|
+
|
93
|
+
assert_equal "#sector +en", parse( "#SECTOR EN" ).to_s
|
94
|
+
assert_equal "#sector +en", parse( " # SECTOR + EN " ).to_s
|
95
|
+
|
96
|
+
|
97
|
+
tag = parse( "#adm1" )
|
98
|
+
assert_equal "#adm1", tag.to_s
|
99
|
+
assert_equal "adm1", tag.name
|
100
|
+
assert_equal [], tag.attributes
|
101
|
+
assert_equal String, tag.type
|
102
|
+
|
103
|
+
assert_equal "#adm1", parse( "ADM1" ).to_s
|
104
|
+
end
|
105
|
+
|
106
|
+
end # class TestTags
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvhuman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -68,11 +68,14 @@ files:
|
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
70
|
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/column.rb
|
71
72
|
- lib/csvhuman/reader.rb
|
73
|
+
- lib/csvhuman/tag.rb
|
72
74
|
- lib/csvhuman/version.rb
|
73
75
|
- test/data/test.csv
|
74
76
|
- test/helper.rb
|
75
77
|
- test/test_reader.rb
|
78
|
+
- test/test_tags.rb
|
76
79
|
homepage: https://github.com/csvreader/csvhuman
|
77
80
|
licenses:
|
78
81
|
- Public Domain
|