csvhuman 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.md +3 -0
- data/Manifest.txt +10 -0
- data/README.md +27 -0
- data/Rakefile +30 -0
- data/lib/csvhuman.rb +24 -0
- data/lib/csvhuman/reader.rb +197 -0
- data/lib/csvhuman/version.rb +24 -0
- data/test/data/test.csv +7 -0
- data/test/helper.rb +17 -0
- data/test/test_reader.rb +59 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 358d150c2a69a16f741b0dae47328857787d2dc2
|
4
|
+
data.tar.gz: fd36923138a7453510d2d26a4e2997c4475b3aea
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1540846d223cb4bcf8dd4d2982f5cdc96b13966328f4037d84d6b1a7f8a3bee998856edcc3aaeb7d14e476f12e5a3cc5fd81d81a6aeed6a44e55c39a24cd8144
|
7
|
+
data.tar.gz: 2fbc7a4ee6f22f75ab4cdea35c5411fc311baf4185d5ebc3012ae4b99a43d301a93a70ff388ac58aaa3eec0eb630d9c5742e94e67cb583214d88dfb05fadebad
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# CSV Humanitarian eXchange Language (HXL) Parser / Reader
|
2
|
+
|
3
|
+
csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
|
4
|
+
|
5
|
+
* home :: [github.com/csvreader/csvhuman](https://github.com/csvreader/csvhuman)
|
6
|
+
* bugs :: [github.com/csvreader/csvhuman/issues](https://github.com/csvreader/csvhuman/issues)
|
7
|
+
* gem :: [rubygems.org/gems/csvhuman](https://rubygems.org/gems/csvhuman)
|
8
|
+
* rdoc :: [rubydoc.info/gems/csvhuman](http://rubydoc.info/gems/csvhuman)
|
9
|
+
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
to be done
|
16
|
+
|
17
|
+
|
18
|
+
## License
|
19
|
+
|
20
|
+
The `csvhuman` scripts are dedicated to the public domain.
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
22
|
+
|
23
|
+
|
24
|
+
## Questions? Comments?
|
25
|
+
|
26
|
+
Send them along to the [wwwmake forum](http://groups.google.com/group/wwwmake).
|
27
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csvhuman/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csvhuman' do
|
5
|
+
|
6
|
+
self.version = CsvHuman::VERSION
|
7
|
+
|
8
|
+
self.summary = "csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csvreader/csvhuman']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'wwwmake@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['csvreader', '>=1.2.1']
|
22
|
+
]
|
23
|
+
|
24
|
+
self.licenses = ['Public Domain']
|
25
|
+
|
26
|
+
self.spec_extras = {
|
27
|
+
required_ruby_version: '>= 2.2.2'
|
28
|
+
}
|
29
|
+
|
30
|
+
end
|
data/lib/csvhuman.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
|
7
|
+
require 'csvreader'
|
8
|
+
|
9
|
+
## our own code
|
10
|
+
require 'csvhuman/version' # note: let version always go first
|
11
|
+
require 'csvhuman/reader'
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
## add some "alternative" shortcut aliases
|
16
|
+
CsvHum = CsvHuman
|
17
|
+
CSV_HXL = CsvHuman
|
18
|
+
CSVHXL = CsvHuman
|
19
|
+
HXL = CsvHuman
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
# say hello
|
24
|
+
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
## Q: how to deal with un-tagged fields?
|
7
|
+
## - skip / ignore
|
8
|
+
## Q: how to deal duplicate fields (e.g. #sex+#targeted,#sex+#targeted)?
|
9
|
+
## - value (auto-magically) turned into an array / list
|
10
|
+
|
11
|
+
|
12
|
+
def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
|
13
|
+
|
14
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
15
|
+
## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
16
|
+
csv = Csv.open( path, mode, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
17
|
+
human = new( csv )
|
18
|
+
|
19
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
20
|
+
if block_given?
|
21
|
+
begin
|
22
|
+
block.call( human )
|
23
|
+
ensure
|
24
|
+
csv.close
|
25
|
+
end
|
26
|
+
else
|
27
|
+
human
|
28
|
+
end
|
29
|
+
end # method self.open
|
30
|
+
|
31
|
+
|
32
|
+
def self.read( path )
|
33
|
+
open( path ) { |human| human.read }
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def self.foreach( path, &block )
|
38
|
+
csv = Csv.open( path, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
39
|
+
human = new( csv )
|
40
|
+
|
41
|
+
if block_given?
|
42
|
+
begin
|
43
|
+
human.each( &block )
|
44
|
+
ensure
|
45
|
+
csv.close
|
46
|
+
end
|
47
|
+
else
|
48
|
+
human.to_enum ## note: caller (responsible) must close file!!!
|
49
|
+
## remove version without block given - why? why not?
|
50
|
+
## use Csv.open().to_enum or Csv.open().each
|
51
|
+
## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
|
52
|
+
end
|
53
|
+
end # method self.foreach
|
54
|
+
|
55
|
+
|
56
|
+
def self.parse( str_or_readable, &block )
|
57
|
+
human = new( str_or_readable )
|
58
|
+
|
59
|
+
if block_given?
|
60
|
+
human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
61
|
+
else # slurp contents, if no block is given
|
62
|
+
human.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
63
|
+
end
|
64
|
+
end # method self.parse
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
class Column
|
70
|
+
attr_reader :tag
|
71
|
+
|
72
|
+
def initialize( tag=nil, list: false )
|
73
|
+
@tag = tag
|
74
|
+
@list = list
|
75
|
+
end
|
76
|
+
|
77
|
+
def tagged?() @tag.nil? == false; end
|
78
|
+
def list?() @list; end
|
79
|
+
end # class Column
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
attr_reader :header, :tags
|
84
|
+
|
85
|
+
def initialize( recs_or_stream )
|
86
|
+
## todo/check: check if arg is a stream/enumarator - why? why not??
|
87
|
+
if recs_or_stream.is_a?( String )
|
88
|
+
@recs = Csv.new( recs_or_stream, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
89
|
+
else
|
90
|
+
@recs = recs_or_stream
|
91
|
+
end
|
92
|
+
|
93
|
+
@header = []
|
94
|
+
@tags = nil ## add tags = nil -- why? why not?
|
95
|
+
@cols = nil ## column mappings (used "internally")
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
include Enumerable
|
100
|
+
|
101
|
+
def each( &block )
|
102
|
+
@header = []
|
103
|
+
@tags = nil ## add tags = nil -- why? why not?
|
104
|
+
@cols = nil ## column mappings (used "internally")
|
105
|
+
|
106
|
+
@recs.each do |values|
|
107
|
+
## pp values
|
108
|
+
if @cols.nil?
|
109
|
+
if values.any? { |value| value && value.start_with?('#') }
|
110
|
+
@cols = build_cols( values )
|
111
|
+
@tags = values
|
112
|
+
else
|
113
|
+
@header << values
|
114
|
+
end
|
115
|
+
else
|
116
|
+
|
117
|
+
## data row
|
118
|
+
## strip non-tagged - how?
|
119
|
+
record = {}
|
120
|
+
@cols.each_with_index do |col,i|
|
121
|
+
if col.tagged?
|
122
|
+
key = col.tag
|
123
|
+
value = values[i]
|
124
|
+
if col.list?
|
125
|
+
record[ key ] ||= []
|
126
|
+
record[ key ] << value
|
127
|
+
else
|
128
|
+
## add "regular" single value
|
129
|
+
record[ key ] = value
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
## pp record
|
134
|
+
block.call( record )
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end # method each
|
138
|
+
|
139
|
+
def read() to_a; end # method read
|
140
|
+
|
141
|
+
|
142
|
+
###
|
143
|
+
### todo/fix:
|
144
|
+
## add closed? and close
|
145
|
+
## if self.open used without block (user needs to close file "manually")
|
146
|
+
|
147
|
+
|
148
|
+
####
|
149
|
+
# helpers
|
150
|
+
|
151
|
+
|
152
|
+
def build_cols( values )
|
153
|
+
|
154
|
+
## "clean" unify/normalize names
|
155
|
+
values = values.map do |value|
|
156
|
+
if value
|
157
|
+
if value.empty?
|
158
|
+
nil ## make untagged fields nil
|
159
|
+
else
|
160
|
+
## todo: sort attributes by a-to-z
|
161
|
+
## strip / remove all spaces
|
162
|
+
value.strip.gsub('#','') ## remove leading # - why? why not?
|
163
|
+
end
|
164
|
+
else
|
165
|
+
value ## keep (nil) as is
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
170
|
+
counts = {}
|
171
|
+
values.each_with_index do |value,i|
|
172
|
+
if value
|
173
|
+
counts[value] ||= []
|
174
|
+
counts[value] << i
|
175
|
+
end
|
176
|
+
end
|
177
|
+
## pp counts
|
178
|
+
|
179
|
+
|
180
|
+
cols = []
|
181
|
+
values.each do |value|
|
182
|
+
if value
|
183
|
+
count = counts[value]
|
184
|
+
if count.size > 1
|
185
|
+
cols << Column.new( value, list: true )
|
186
|
+
else
|
187
|
+
cols << Column.new( value )
|
188
|
+
end
|
189
|
+
else
|
190
|
+
cols << Column.new
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
cols
|
195
|
+
end
|
196
|
+
|
197
|
+
end # class CsvHuman
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
MAJOR = 0
|
7
|
+
MINOR = 1
|
8
|
+
PATCH = 0
|
9
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
|
+
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module CsvHuman
|
data/test/data/test.csv
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
What,,,Who,Where,For whom,
|
2
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
3
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
4
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
5
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
6
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
7
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
data/test/helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
|
5
|
+
require 'minitest/autorun'
|
6
|
+
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'csvhuman'
|
10
|
+
|
11
|
+
|
12
|
+
## add test_data_dir helper
|
13
|
+
class CsvHuman
|
14
|
+
def self.test_data_dir
|
15
|
+
"#{root}/test/data"
|
16
|
+
end
|
17
|
+
end
|
data/test/test_reader.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestReader < MiniTest::Test
|
11
|
+
|
12
|
+
def recs
|
13
|
+
[["Organisation", "Cluster", "Province" ],
|
14
|
+
[ "#org", "#sector", "#adm1" ],
|
15
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
16
|
+
[ "Org B", "Health", "Mountain Province" ],
|
17
|
+
[ "Org C", "Education", "Coastal Province" ],
|
18
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
19
|
+
end
|
20
|
+
|
21
|
+
def txt
|
22
|
+
<<TXT
|
23
|
+
What,,,Who,Where,For whom,
|
24
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
25
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
26
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
27
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
28
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
29
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
30
|
+
TXT
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def test_readme
|
35
|
+
csv = CsvHuman.new( recs )
|
36
|
+
csv.each do |rec|
|
37
|
+
pp rec
|
38
|
+
end
|
39
|
+
|
40
|
+
pp csv.read
|
41
|
+
pp CsvHuman.parse( recs )
|
42
|
+
|
43
|
+
CsvHuman.parse( recs ).each do |rec|
|
44
|
+
pp rec
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
|
49
|
+
pp CsvHuman.parse( txt )
|
50
|
+
CsvHuman.parse( txt ).each do |rec|
|
51
|
+
pp rec
|
52
|
+
end
|
53
|
+
|
54
|
+
CsvHuman.foreach( "#{CsvHuman.test_data_dir}/test.csv" ) do |rec|
|
55
|
+
pp rec
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end # class TestReader
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csvhuman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-11-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csvreader
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.2.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '4.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hoe
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.16'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.16'
|
55
|
+
description: csvhuman - read tabular data in the CSV Humanitarian eXchange Language
|
56
|
+
(HXL) format, that is, comma-separated values (CSV) line-by-line records with a
|
57
|
+
hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
|
58
|
+
email: wwwmake@googlegroups.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files:
|
62
|
+
- HISTORY.md
|
63
|
+
- Manifest.txt
|
64
|
+
- README.md
|
65
|
+
files:
|
66
|
+
- HISTORY.md
|
67
|
+
- Manifest.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/reader.rb
|
72
|
+
- lib/csvhuman/version.rb
|
73
|
+
- test/data/test.csv
|
74
|
+
- test/helper.rb
|
75
|
+
- test/test_reader.rb
|
76
|
+
homepage: https://github.com/csvreader/csvhuman
|
77
|
+
licenses:
|
78
|
+
- Public Domain
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options:
|
82
|
+
- "--main"
|
83
|
+
- README.md
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 2.2.2
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.5.2
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format,
|
102
|
+
that is, comma-separated values (CSV) line-by-line records with a hashtag (meta
|
103
|
+
data) line using the Humanitarian eXchange Language (HXL) rules
|
104
|
+
test_files: []
|