csvhuman 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/README.md +213 -1
- data/lib/csvhuman.rb +2 -1
- data/lib/csvhuman/column.rb +89 -0
- data/lib/csvhuman/reader.rb +4 -69
- data/lib/csvhuman/tag.rb +162 -0
- data/lib/csvhuman/version.rb +1 -1
- data/test/test_reader.rb +24 -1
- data/test/test_tags.rb +106 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e03d4dc51acff7d6b47f1648abb47cfaa2a9028
|
4
|
+
data.tar.gz: b4921c44a67c57feae5c1f62eff5aa87ef81c996
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 675050a1e5af601ea6634fe17c0dcea511c917170438469c5f09349e4bd26678b5d42cc7cd5b9c97c2455b61ea67cd5719bff7c4637971849ae056955d562f2b
|
7
|
+
data.tar.gz: 9a7da3cdf466ebfec142344c505558b2c86fd38bee9c3b7d766c69cd0d127e5c42f4f854a6f61df7e4c57610ea4b2bec3bbdc121821a9c72b875cb470af2b50f
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -10,9 +10,221 @@ csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Lang
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
+
|
14
|
+
## What's Humanitarian eXchange Language (HXL)?
|
15
|
+
|
16
|
+
[Humanitarian eXchange Language (HXL)](https://github.com/csvspecs/csv-hxl)
|
17
|
+
is a (meta data) convention for
|
18
|
+
adding agreed on hashtags e.g. `#org,#country,#sex+#targeted,#adm1`
|
19
|
+
inline in a (single new line / row)
|
20
|
+
between the last header row and the first data row
|
21
|
+
for sharing tabular data across organisations
|
22
|
+
(during a humanitarian crisis).
|
23
|
+
Example:
|
24
|
+
|
25
|
+
|
26
|
+
```
|
27
|
+
What,,,Who,Where,For whom,
|
28
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
29
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
30
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
31
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
32
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
33
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
34
|
+
```
|
35
|
+
|
36
|
+
|
13
37
|
## Usage
|
14
38
|
|
15
|
-
to
|
39
|
+
Pass in an array of arrays (or a stream responding to `#each` with an array of strings).
|
40
|
+
Example:
|
41
|
+
|
42
|
+
|
43
|
+
``` ruby
|
44
|
+
pp CsvHuman.parse( [["Organisation", "Cluster", "Province" ], ## or use HXL.parse
|
45
|
+
[ "#org", "#sector", "#adm1" ],
|
46
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
47
|
+
[ "Org B", "Health", "Mountain Province" ],
|
48
|
+
[ "Org C", "Education", "Coastal Province" ],
|
49
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
50
|
+
```
|
51
|
+
|
52
|
+
resulting in:
|
53
|
+
|
54
|
+
``` ruby
|
55
|
+
[{"org" => "Org A", "sector" => "WASH", "adm1" => "Coastal Province"},
|
56
|
+
{"org" => "Org B", "sector" => "Health", "adm1" => "Mountain Province"},
|
57
|
+
{"org" => "Org C", "sector" => "Education", "adm1" => "Coastal Province"},
|
58
|
+
{"org" => "Org A", "sector" => "WASH", "adm1" => "Plains Province"}]
|
59
|
+
```
|
60
|
+
|
61
|
+
Or pass in the text. Example:
|
62
|
+
|
63
|
+
``` ruby
|
64
|
+
pp CsvHuman.parse( <<TXT ) ## or use HXL.parse
|
65
|
+
What,,,Who,Where,For whom,
|
66
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
67
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
68
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
69
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
70
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
71
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
72
|
+
TXT
|
73
|
+
```
|
74
|
+
|
75
|
+
resulting in:
|
76
|
+
|
77
|
+
```
|
78
|
+
[{"sector+en" => "WASH",
|
79
|
+
"subsector" => "Subsector 1",
|
80
|
+
"org" => "Org 1",
|
81
|
+
"country" => "Country 1",
|
82
|
+
"sex+targeted" => ["100", "100"],
|
83
|
+
"adm1" => "Region 1"},
|
84
|
+
{"sector+en" => "Health",
|
85
|
+
"subsector" => "Subsector 2",
|
86
|
+
"org" => "Org 2",
|
87
|
+
"country" => "Country 2",
|
88
|
+
"sex+targeted" => ["", ""],
|
89
|
+
"adm1" => "Region 2"},
|
90
|
+
{"sector+en" => "Education",
|
91
|
+
"subsector" => "Subsector 3",
|
92
|
+
"org" => "Org 3",
|
93
|
+
"country" => "Country 2",
|
94
|
+
"sex+targeted" => ["250", "300"],
|
95
|
+
"adm1" => "Region 3"},
|
96
|
+
{"sector+en" => "WASH",
|
97
|
+
"subsector" => "Subsector 4",
|
98
|
+
"org" => "Org 1",
|
99
|
+
"country" => "Country 3",
|
100
|
+
"sex+targeted" => ["80", "95"],
|
101
|
+
"adm1" => "Region 4"}]
|
102
|
+
```
|
103
|
+
|
104
|
+
|
105
|
+
More ways to use the reader:
|
106
|
+
|
107
|
+
``` ruby
|
108
|
+
csv = CsvHuman.new( recs )
|
109
|
+
csv.each do |rec|
|
110
|
+
pp rec
|
111
|
+
end
|
112
|
+
|
113
|
+
pp csv.read
|
114
|
+
|
115
|
+
|
116
|
+
CsvHuman.parse( recs ).each do |rec|
|
117
|
+
pp rec
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
pp CsvHuman.read( "./test.csv" )
|
122
|
+
|
123
|
+
CsvHuman.foreach( "./test.csv" ) do |rec|
|
124
|
+
pp rec
|
125
|
+
end
|
126
|
+
|
127
|
+
#...
|
128
|
+
|
129
|
+
```
|
130
|
+
|
131
|
+
or use the `HXL` alias:
|
132
|
+
|
133
|
+
``` ruby
|
134
|
+
hxl = HXL.new( recs )
|
135
|
+
hxl.each do |rec|
|
136
|
+
pp rec
|
137
|
+
end
|
138
|
+
|
139
|
+
pp hxl.read
|
140
|
+
|
141
|
+
|
142
|
+
HXL.parse( recs ).each do |rec|
|
143
|
+
pp rec
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
pp HXL.read( "./test.csv" )
|
148
|
+
|
149
|
+
HXL.foreach( "./test.csv" ) do |rec|
|
150
|
+
pp rec
|
151
|
+
end
|
152
|
+
|
153
|
+
#...
|
154
|
+
```
|
155
|
+
|
156
|
+
Note: More aliases for `CsvHuman`, `HXL`? Yes, you can use
|
157
|
+
`CsvHum`, `CSV_HXL`, `CSVHXL` too.
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
## Tag Helpers
|
164
|
+
|
165
|
+
**Normalize**. Use `CsvHuman::Tag.normalize` to pretty print or normalize a tag.
|
166
|
+
All parts get downcased (lowercased), all attributes sorted by a-to-z,
|
167
|
+
all extra or missing hashtags or pluses added or removed
|
168
|
+
all extra or missing spaces added or removed. Example:
|
169
|
+
|
170
|
+
``` ruby
|
171
|
+
HXL::Tag.normalize( "#sector+en" )
|
172
|
+
# => "#sector +en"
|
173
|
+
HXL::Tag.normalize( "#SECTOR EN" )
|
174
|
+
# => "#sector +en"
|
175
|
+
HXL::Tag.normalize( "# SECTOR + #EN " )
|
176
|
+
# => "#sector +en"
|
177
|
+
HXL::Tag.normalize( "SECTOR EN" )
|
178
|
+
# => "#sector +en"
|
179
|
+
# ...
|
180
|
+
```
|
181
|
+
|
182
|
+
|
183
|
+
**Split**. Use `CsvHuman::Tag.split` to split (and normalize) a tag into its parts.
|
184
|
+
Example:
|
185
|
+
|
186
|
+
``` ruby
|
187
|
+
HXL::Tag.split( "#sector+en" )
|
188
|
+
# => ["sector", "en"]
|
189
|
+
HXL::Tag.split( "#SECTOR EN" )
|
190
|
+
# => ["sector", "en"]
|
191
|
+
HXL::Tag.split( "# SECTOR + #EN " )
|
192
|
+
# => ["sector", "en"]
|
193
|
+
HXL::Tag.split( "SECTOR EN" )
|
194
|
+
# => ["sector", "en"]
|
195
|
+
|
196
|
+
## sort attributes a-to-z
|
197
|
+
HXL::Tag.split( "#affected +f +children" )
|
198
|
+
# => ["affected", "children", "f"]
|
199
|
+
HXL::Tag.split( "#population +children +affected +m" )
|
200
|
+
# => ["population", "affected", "children", "m"]
|
201
|
+
HXL::Tag.split( "#population+children+affected+m" )
|
202
|
+
# => ["population", "affected", "children", "m"]
|
203
|
+
HXL::Tag.split( "#population+#children+#affected+#m" )
|
204
|
+
# => ["population", "affected", "children", "m"]
|
205
|
+
HXL::Tag.split( "#population #children #affected #m" )
|
206
|
+
# => ["population", "affected", "children", "m"]
|
207
|
+
HXL::Tag.split( "POPULATION CHILDREN AFFECTED M" )
|
208
|
+
# => ["population", "affected", "children", "m"]
|
209
|
+
#...
|
210
|
+
```
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
## Frequently Asked Questions (FAQ) and Answers
|
216
|
+
|
217
|
+
|
218
|
+
### Q: How to deal with un-tagged fields?
|
219
|
+
|
220
|
+
**A**: Un-tagged fields get skipped / ignored.
|
221
|
+
|
222
|
+
|
223
|
+
### Q: How to deal with duplicate / repeated fields (e.g. `#sex+#targeted,#sex+#targeted`)?
|
224
|
+
|
225
|
+
**A**: Repeated fields (auto-magically) get turned into an array / list.
|
226
|
+
|
227
|
+
|
16
228
|
|
17
229
|
|
18
230
|
## License
|
data/lib/csvhuman.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'pp'
|
4
|
-
require 'logger'
|
5
4
|
|
6
5
|
|
7
6
|
require 'csvreader'
|
8
7
|
|
9
8
|
## our own code
|
10
9
|
require 'csvhuman/version' # note: let version always go first
|
10
|
+
require 'csvhuman/tag'
|
11
|
+
require 'csvhuman/column'
|
11
12
|
require 'csvhuman/reader'
|
12
13
|
|
13
14
|
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
|
7
|
+
class Columns
|
8
|
+
|
9
|
+
|
10
|
+
def self.build( values )
|
11
|
+
|
12
|
+
## "clean" unify/normalize names
|
13
|
+
tag_keys = values.map do |value|
|
14
|
+
if value
|
15
|
+
if value.empty?
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
## e.g. #ADM1 CODE => #adm1 +code
|
19
|
+
## POPULATION F CHILDREN AFFECTED => #population +affected +children +f
|
20
|
+
value = Tag.normalize( value )
|
21
|
+
## turn empty normalized tags (e.g. "stray" hashtag) into nil too
|
22
|
+
value = nil if value.empty?
|
23
|
+
value
|
24
|
+
end
|
25
|
+
else # keep (nil) as is
|
26
|
+
nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
counts = {}
|
32
|
+
tag_keys.each_with_index do |key,i|
|
33
|
+
if key
|
34
|
+
counts[key] ||= []
|
35
|
+
counts[key] << i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
## puts "counts:"
|
39
|
+
## pp counts
|
40
|
+
|
41
|
+
## create all unique tags
|
42
|
+
tags = {}
|
43
|
+
counts.each_key do |key|
|
44
|
+
tags[key] = Tag.parse( key )
|
45
|
+
end
|
46
|
+
## puts "tags:"
|
47
|
+
## pp tags
|
48
|
+
|
49
|
+
|
50
|
+
cols = []
|
51
|
+
tag_keys.each do |key|
|
52
|
+
if key
|
53
|
+
count = counts[key]
|
54
|
+
tag = tags[key] ## note: "reuse" tag for all columns if list
|
55
|
+
if count.size > 1
|
56
|
+
## note: defaults to use "standard/default" tag key (as a string)
|
57
|
+
cols << Column.new( tag.key, tag, list: true )
|
58
|
+
else
|
59
|
+
cols << Column.new( tag.key, tag )
|
60
|
+
end
|
61
|
+
else
|
62
|
+
cols << Column.new
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
cols
|
67
|
+
end
|
68
|
+
end ## class Columns
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
class Column
|
74
|
+
attr_reader :key # used for record (record key); note: list columns must use the same key
|
75
|
+
attr_reader :tag
|
76
|
+
|
77
|
+
|
78
|
+
def initialize( key=nil, tag=nil, list: false )
|
79
|
+
@key = key
|
80
|
+
@tag = tag
|
81
|
+
@list = list
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
def tagged?() @tag.nil? == false; end
|
86
|
+
def list?() @list; end
|
87
|
+
end # class Column
|
88
|
+
|
89
|
+
end # class CsvHuman
|
data/lib/csvhuman/reader.rb
CHANGED
@@ -65,21 +65,6 @@ class CsvHuman
|
|
65
65
|
|
66
66
|
|
67
67
|
|
68
|
-
|
69
|
-
class Column
|
70
|
-
attr_reader :tag
|
71
|
-
|
72
|
-
def initialize( tag=nil, list: false )
|
73
|
-
@tag = tag
|
74
|
-
@list = list
|
75
|
-
end
|
76
|
-
|
77
|
-
def tagged?() @tag.nil? == false; end
|
78
|
-
def list?() @list; end
|
79
|
-
end # class Column
|
80
|
-
|
81
|
-
|
82
|
-
|
83
68
|
attr_reader :header, :tags
|
84
69
|
|
85
70
|
def initialize( recs_or_stream )
|
@@ -106,8 +91,8 @@ def each( &block )
|
|
106
91
|
@recs.each do |values|
|
107
92
|
## pp values
|
108
93
|
if @cols.nil?
|
109
|
-
if values.any? { |value| value && value.start_with?('#') }
|
110
|
-
@cols =
|
94
|
+
if values.any? { |value| value && value.strip.start_with?('#') }
|
95
|
+
@cols = Columns.build( values )
|
111
96
|
@tags = values
|
112
97
|
else
|
113
98
|
@header << values
|
@@ -119,8 +104,8 @@ def each( &block )
|
|
119
104
|
record = {}
|
120
105
|
@cols.each_with_index do |col,i|
|
121
106
|
if col.tagged?
|
122
|
-
key = col.
|
123
|
-
value = values[i]
|
107
|
+
key = col.key
|
108
|
+
value = values[i] ## todo/fix: use col.tag.typecast( values[i] )
|
124
109
|
if col.list?
|
125
110
|
record[ key ] ||= []
|
126
111
|
record[ key ] << value
|
@@ -144,54 +129,4 @@ def read() to_a; end # method read
|
|
144
129
|
## add closed? and close
|
145
130
|
## if self.open used without block (user needs to close file "manually")
|
146
131
|
|
147
|
-
|
148
|
-
####
|
149
|
-
# helpers
|
150
|
-
|
151
|
-
|
152
|
-
def build_cols( values )
|
153
|
-
|
154
|
-
## "clean" unify/normalize names
|
155
|
-
values = values.map do |value|
|
156
|
-
if value
|
157
|
-
if value.empty?
|
158
|
-
nil ## make untagged fields nil
|
159
|
-
else
|
160
|
-
## todo: sort attributes by a-to-z
|
161
|
-
## strip / remove all spaces
|
162
|
-
value.strip.gsub('#','') ## remove leading # - why? why not?
|
163
|
-
end
|
164
|
-
else
|
165
|
-
value ## keep (nil) as is
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
|
170
|
-
counts = {}
|
171
|
-
values.each_with_index do |value,i|
|
172
|
-
if value
|
173
|
-
counts[value] ||= []
|
174
|
-
counts[value] << i
|
175
|
-
end
|
176
|
-
end
|
177
|
-
## pp counts
|
178
|
-
|
179
|
-
|
180
|
-
cols = []
|
181
|
-
values.each do |value|
|
182
|
-
if value
|
183
|
-
count = counts[value]
|
184
|
-
if count.size > 1
|
185
|
-
cols << Column.new( value, list: true )
|
186
|
-
else
|
187
|
-
cols << Column.new( value )
|
188
|
-
end
|
189
|
-
else
|
190
|
-
cols << Column.new
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
cols
|
195
|
-
end
|
196
|
-
|
197
132
|
end # class CsvHuman
|
data/lib/csvhuman/tag.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
class Tag
|
8
|
+
|
9
|
+
## 1) plus (with optional hashtag and/or optional leading and trailing spaces)
|
10
|
+
## 2) hashtag (with optional leading and trailing spaces)
|
11
|
+
## 3) spaces only (not followed by plus) or
|
12
|
+
## note: plus pattern must go first (otherwise "sector + en" becomes ["sector", "", "en"])
|
13
|
+
SEP_REGEX = /(?: \s*\++
|
14
|
+
(?:\s*\#+)?
|
15
|
+
\s* )
|
16
|
+
|
|
17
|
+
(?: \s*\#+\s* )
|
18
|
+
|
|
19
|
+
(?: \s+)
|
20
|
+
/x ## check if \s includes space AND tab?
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def self.split( value )
|
25
|
+
value = value.strip
|
26
|
+
value = value.downcase
|
27
|
+
while value.start_with?('#') do ## allow one or more hashes
|
28
|
+
value = value[1..-1] ## remove leading #
|
29
|
+
value = value.strip ## strip (optional) leading spaces (again)
|
30
|
+
end
|
31
|
+
## pp value
|
32
|
+
parts = value.split( SEP_REGEX )
|
33
|
+
|
34
|
+
## sort attributes a-z
|
35
|
+
if parts.size > 2
|
36
|
+
[parts[0]] + parts[1..-1].sort
|
37
|
+
else
|
38
|
+
parts
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
def self.normalize( value ) ## todo: rename to pretty or something or add alias
|
44
|
+
parts = split( value )
|
45
|
+
name = parts[0]
|
46
|
+
attributes = parts[1..-1] ## note: might be nil
|
47
|
+
|
48
|
+
buf = ''
|
49
|
+
if name ## note: name might be nil too e.g. value = "" or value = " "
|
50
|
+
buf << '#' + name
|
51
|
+
if attributes && attributes.size > 0
|
52
|
+
buf << ' +'
|
53
|
+
buf << attributes.join(' +')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
buf
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def self.guess_type( name, attributes )
|
61
|
+
|
62
|
+
if name == 'date'
|
63
|
+
Date
|
64
|
+
elsif ['affected', 'inneed'].include?( name )
|
65
|
+
Integer
|
66
|
+
else
|
67
|
+
## check attributes
|
68
|
+
if attributes.nil? || attributes.empty?
|
69
|
+
String ## assume (default to) string
|
70
|
+
elsif attributes.include?( 'num' )
|
71
|
+
Integer
|
72
|
+
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
73
|
+
Date
|
74
|
+
elsif attributes.include?( 'affected' )
|
75
|
+
Integer
|
76
|
+
else
|
77
|
+
String ## assume (default to) string
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
def self.parse( value )
|
85
|
+
parts = split( value )
|
86
|
+
|
87
|
+
name = parts[0]
|
88
|
+
attributes = parts[1..-1] ## todo/fix: check if nil (make it empty array [] always) - why? why not?
|
89
|
+
type = guess_type( name, attributes )
|
90
|
+
|
91
|
+
new( name, attributes, type )
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
attr_reader :name
|
98
|
+
attr_reader :attributes ## use attribs or something shorter - why? why not?
|
99
|
+
attr_reader :type
|
100
|
+
|
101
|
+
def initialize( name, attributes=nil, type=String )
|
102
|
+
@name = name
|
103
|
+
## sorted a-z - note: make sure attributes is [] NOT nil if empty - why? why not?
|
104
|
+
@attributes = attributes || []
|
105
|
+
@type = type ## type class (defaults to String)
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
def key
|
110
|
+
## convenience short cut for "standard/default" string key
|
111
|
+
## cache/pre-built/memoize - why? why not?
|
112
|
+
## builds:
|
113
|
+
## population+affected+children+f
|
114
|
+
|
115
|
+
buf = ''
|
116
|
+
buf << @name
|
117
|
+
if @attributes && @attributes.size > 0
|
118
|
+
buf << '+'
|
119
|
+
buf << @attributes.join('+')
|
120
|
+
end
|
121
|
+
buf
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_s
|
125
|
+
## cache/pre-built/memoize - why? why not?
|
126
|
+
##
|
127
|
+
## builds
|
128
|
+
## #population +affected +children +f
|
129
|
+
|
130
|
+
buf = ''
|
131
|
+
buf << '#' + @name
|
132
|
+
if @attributes && @attributes.size > 0
|
133
|
+
buf << ' +'
|
134
|
+
buf << @attributes.join(' +')
|
135
|
+
end
|
136
|
+
buf
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def typecast( value ) ## use convert or call - why? why not?
|
141
|
+
if @type == Integer
|
142
|
+
conv_to_i( value )
|
143
|
+
else ## assume String
|
144
|
+
# pass through as is
|
145
|
+
value
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
def conv_to_i( value )
|
151
|
+
if value.nil? || value.empty?
|
152
|
+
nil ## return nil - why? why not?
|
153
|
+
else
|
154
|
+
Integer( value )
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
end # class Tag
|
160
|
+
|
161
|
+
|
162
|
+
end # class CsvHuman
|
data/lib/csvhuman/version.rb
CHANGED
data/test/test_reader.rb
CHANGED
@@ -18,6 +18,26 @@ def recs
|
|
18
18
|
[ "Org A", "WASH", "Plains Province" ]]
|
19
19
|
end
|
20
20
|
|
21
|
+
def recs2
|
22
|
+
[["Organisation", "Cluster", "Province" ],
|
23
|
+
[ "ORG", "#SECTOR", "ADM1" ],
|
24
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
25
|
+
[ "Org B", "Health", "Mountain Province" ],
|
26
|
+
[ "Org C", "Education", "Coastal Province" ],
|
27
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def expected_recs
|
32
|
+
[{"org"=>"Org A", "sector"=>"WASH", "adm1"=>"Coastal Province"},
|
33
|
+
{"org"=>"Org B", "sector"=>"Health", "adm1"=>"Mountain Province"},
|
34
|
+
{"org"=>"Org C", "sector"=>"Education", "adm1"=>"Coastal Province"},
|
35
|
+
{"org"=>"Org A", "sector"=>"WASH", "adm1"=>"Plains Province"}]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
21
41
|
def txt
|
22
42
|
<<TXT
|
23
43
|
What,,,Who,Where,For whom,
|
@@ -38,7 +58,10 @@ def test_readme
|
|
38
58
|
end
|
39
59
|
|
40
60
|
pp csv.read
|
41
|
-
|
61
|
+
|
62
|
+
assert_equal expected_recs, CsvHuman.parse( recs )
|
63
|
+
assert_equal expected_recs, CsvHuman.parse( recs2 )
|
64
|
+
|
42
65
|
|
43
66
|
CsvHuman.parse( recs ).each do |rec|
|
44
67
|
pp rec
|
data/test/test_tags.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_tags.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestTags < MiniTest::Test
|
11
|
+
|
12
|
+
def split( value )
|
13
|
+
CsvHuman::Tag.split( value ) ## returns an array of strings (name+attributes[])
|
14
|
+
end
|
15
|
+
|
16
|
+
def normalize( value )
|
17
|
+
CsvHuman::Tag.normalize( value ) ## returns a string
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse( value )
|
21
|
+
CsvHuman::Tag.parse( value ) ## returns a Tag class
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
def test_split
|
27
|
+
assert_equal [], split( "" ) # empty
|
28
|
+
assert_equal [], split( " " ) # empty
|
29
|
+
|
30
|
+
## more empties (all matched by separator regex/pattern)
|
31
|
+
## keep as empty - why? why not?
|
32
|
+
assert_equal [], split( " # " ) # empty
|
33
|
+
assert_equal [], split( " ## " ) # empty
|
34
|
+
assert_equal [], split( " + " ) # empty
|
35
|
+
assert_equal [], split( " +++ " ) # empty
|
36
|
+
assert_equal [], split( " +++## " ) # empty
|
37
|
+
|
38
|
+
|
39
|
+
assert_equal ["sector", "en"], split( "#sector+en" )
|
40
|
+
assert_equal ["sector", "en"], split( "#SECTOR EN" )
|
41
|
+
assert_equal ["sector", "en"], split( " # SECTOR + EN " )
|
42
|
+
assert_equal ["sector", "en"], split( "SeCtOr en" )
|
43
|
+
assert_equal ["sector", "en"], split( "#sector#en" )
|
44
|
+
assert_equal ["sector", "en"], split( "#sector+#en" ) ## allow (optional) hash for attributes
|
45
|
+
assert_equal ["sector", "en"], split( "##sector#en" ) ## allow hash only for attributes
|
46
|
+
assert_equal ["sector", "en"], split( "# #sector+++ ##en" ) ## allow one or more plus or hashes (typos) for attibutes
|
47
|
+
|
48
|
+
|
49
|
+
assert_equal ["adm1", "code"], split( "#ADM1 +CODE" )
|
50
|
+
assert_equal ["adm1", "code"], split( " # ADM1 + CODE" )
|
51
|
+
assert_equal ["adm1", "code"], split( "ADM1 CODE" )
|
52
|
+
|
53
|
+
## sort attributes a-to-z
|
54
|
+
assert_equal ["affected", "children", "f"], split( "#affected +f +children" )
|
55
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population +children +affected +m" )
|
56
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population+children+affected+m" )
|
57
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population+#children+#affected+#m" )
|
58
|
+
assert_equal ["population", "affected", "children", "m"], split( "#population #children #affected #m" )
|
59
|
+
assert_equal ["population", "affected", "children", "m"], split( "POPULATION CHILDREN AFFECTED M" )
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
def test_normalize
|
64
|
+
assert_equal "", normalize( "" ) # empty
|
65
|
+
assert_equal "", normalize( " " ) # empty
|
66
|
+
|
67
|
+
assert_equal "#sector +en", normalize( "#sector+en" )
|
68
|
+
assert_equal "#sector +en", normalize( "#SECTOR EN" )
|
69
|
+
assert_equal "#sector +en", normalize( " # SECTOR + EN " )
|
70
|
+
assert_equal "#sector +en", normalize( " # SECTOR # EN " )
|
71
|
+
assert_equal "#sector +en", normalize( "SeCToR en" )
|
72
|
+
|
73
|
+
assert_equal "#adm1 +code", normalize( "#ADM1 +CODE" )
|
74
|
+
assert_equal "#adm1 +code", normalize( " # ADM1 + CODE" )
|
75
|
+
assert_equal "#adm1 +code", normalize( " # ADM1 + #CODE" )
|
76
|
+
assert_equal "#adm1 +code", normalize( "ADM1 Code" )
|
77
|
+
|
78
|
+
## sort attributes a-to-z
|
79
|
+
assert_equal "#affected +children +f", normalize( "#affected +f +children" )
|
80
|
+
assert_equal "#population +affected +children +m", normalize( "#population +children +affected +m" )
|
81
|
+
assert_equal "#population +affected +children +m", normalize( "#population+children+affected+m" )
|
82
|
+
assert_equal "#population +affected +children +m", normalize( "POPULATION CHILDREN AFFECTED M" )
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def test_parse
|
87
|
+
tag = parse( "#sector+en" )
|
88
|
+
assert_equal "#sector +en", tag.to_s
|
89
|
+
assert_equal "sector", tag.name
|
90
|
+
assert_equal ["en"], tag.attributes
|
91
|
+
assert_equal String, tag.type
|
92
|
+
|
93
|
+
assert_equal "#sector +en", parse( "#SECTOR EN" ).to_s
|
94
|
+
assert_equal "#sector +en", parse( " # SECTOR + EN " ).to_s
|
95
|
+
|
96
|
+
|
97
|
+
tag = parse( "#adm1" )
|
98
|
+
assert_equal "#adm1", tag.to_s
|
99
|
+
assert_equal "adm1", tag.name
|
100
|
+
assert_equal [], tag.attributes
|
101
|
+
assert_equal String, tag.type
|
102
|
+
|
103
|
+
assert_equal "#adm1", parse( "ADM1" ).to_s
|
104
|
+
end
|
105
|
+
|
106
|
+
end # class TestTags
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvhuman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -68,11 +68,14 @@ files:
|
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
70
|
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/column.rb
|
71
72
|
- lib/csvhuman/reader.rb
|
73
|
+
- lib/csvhuman/tag.rb
|
72
74
|
- lib/csvhuman/version.rb
|
73
75
|
- test/data/test.csv
|
74
76
|
- test/helper.rb
|
75
77
|
- test/test_reader.rb
|
78
|
+
- test/test_tags.rb
|
76
79
|
homepage: https://github.com/csvreader/csvhuman
|
77
80
|
licenses:
|
78
81
|
- Public Domain
|