csvhuman 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.md +3 -0
- data/Manifest.txt +10 -0
- data/README.md +27 -0
- data/Rakefile +30 -0
- data/lib/csvhuman.rb +24 -0
- data/lib/csvhuman/reader.rb +197 -0
- data/lib/csvhuman/version.rb +24 -0
- data/test/data/test.csv +7 -0
- data/test/helper.rb +17 -0
- data/test/test_reader.rb +59 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 358d150c2a69a16f741b0dae47328857787d2dc2
|
4
|
+
data.tar.gz: fd36923138a7453510d2d26a4e2997c4475b3aea
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1540846d223cb4bcf8dd4d2982f5cdc96b13966328f4037d84d6b1a7f8a3bee998856edcc3aaeb7d14e476f12e5a3cc5fd81d81a6aeed6a44e55c39a24cd8144
|
7
|
+
data.tar.gz: 2fbc7a4ee6f22f75ab4cdea35c5411fc311baf4185d5ebc3012ae4b99a43d301a93a70ff388ac58aaa3eec0eb630d9c5742e94e67cb583214d88dfb05fadebad
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# CSV Humanitarian eXchange Language (HXL) Parser / Reader
|
2
|
+
|
3
|
+
csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
|
4
|
+
|
5
|
+
* home :: [github.com/csvreader/csvhuman](https://github.com/csvreader/csvhuman)
|
6
|
+
* bugs :: [github.com/csvreader/csvhuman/issues](https://github.com/csvreader/csvhuman/issues)
|
7
|
+
* gem :: [rubygems.org/gems/csvhuman](https://rubygems.org/gems/csvhuman)
|
8
|
+
* rdoc :: [rubydoc.info/gems/csvhuman](http://rubydoc.info/gems/csvhuman)
|
9
|
+
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
to be done
|
16
|
+
|
17
|
+
|
18
|
+
## License
|
19
|
+
|
20
|
+
The `csvhuman` scripts are dedicated to the public domain.
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
22
|
+
|
23
|
+
|
24
|
+
## Questions? Comments?
|
25
|
+
|
26
|
+
Send them along to the [wwwmake forum](http://groups.google.com/group/wwwmake).
|
27
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csvhuman/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csvhuman' do
|
5
|
+
|
6
|
+
self.version = CsvHuman::VERSION
|
7
|
+
|
8
|
+
self.summary = "csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csvreader/csvhuman']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'wwwmake@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['csvreader', '>=1.2.1']
|
22
|
+
]
|
23
|
+
|
24
|
+
self.licenses = ['Public Domain']
|
25
|
+
|
26
|
+
self.spec_extras = {
|
27
|
+
required_ruby_version: '>= 2.2.2'
|
28
|
+
}
|
29
|
+
|
30
|
+
end
|
data/lib/csvhuman.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
|
7
|
+
require 'csvreader'
|
8
|
+
|
9
|
+
## our own code
|
10
|
+
require 'csvhuman/version' # note: let version always go first
|
11
|
+
require 'csvhuman/reader'
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
## add some "alternative" shortcut aliases
|
16
|
+
CsvHum = CsvHuman
|
17
|
+
CSV_HXL = CsvHuman
|
18
|
+
CSVHXL = CsvHuman
|
19
|
+
HXL = CsvHuman
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
# say hello
|
24
|
+
puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
## Q: how to deal with un-tagged fields?
|
7
|
+
## - skip / ignore
|
8
|
+
## Q: how to deal duplicate fields (e.g. #sex+#targeted,#sex+#targeted)?
|
9
|
+
## - value (auto-magically) turned into an array / list
|
10
|
+
|
11
|
+
|
12
|
+
def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
|
13
|
+
|
14
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
15
|
+
## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
16
|
+
csv = Csv.open( path, mode, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
17
|
+
human = new( csv )
|
18
|
+
|
19
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
20
|
+
if block_given?
|
21
|
+
begin
|
22
|
+
block.call( human )
|
23
|
+
ensure
|
24
|
+
csv.close
|
25
|
+
end
|
26
|
+
else
|
27
|
+
human
|
28
|
+
end
|
29
|
+
end # method self.open
|
30
|
+
|
31
|
+
|
32
|
+
def self.read( path )
|
33
|
+
open( path ) { |human| human.read }
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def self.foreach( path, &block )
|
38
|
+
csv = Csv.open( path, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
39
|
+
human = new( csv )
|
40
|
+
|
41
|
+
if block_given?
|
42
|
+
begin
|
43
|
+
human.each( &block )
|
44
|
+
ensure
|
45
|
+
csv.close
|
46
|
+
end
|
47
|
+
else
|
48
|
+
human.to_enum ## note: caller (responsible) must close file!!!
|
49
|
+
## remove version without block given - why? why not?
|
50
|
+
## use Csv.open().to_enum or Csv.open().each
|
51
|
+
## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
|
52
|
+
end
|
53
|
+
end # method self.foreach
|
54
|
+
|
55
|
+
|
56
|
+
def self.parse( str_or_readable, &block )
|
57
|
+
human = new( str_or_readable )
|
58
|
+
|
59
|
+
if block_given?
|
60
|
+
human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
61
|
+
else # slurp contents, if no block is given
|
62
|
+
human.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
63
|
+
end
|
64
|
+
end # method self.parse
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
class Column
|
70
|
+
attr_reader :tag
|
71
|
+
|
72
|
+
def initialize( tag=nil, list: false )
|
73
|
+
@tag = tag
|
74
|
+
@list = list
|
75
|
+
end
|
76
|
+
|
77
|
+
def tagged?() @tag.nil? == false; end
|
78
|
+
def list?() @list; end
|
79
|
+
end # class Column
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
attr_reader :header, :tags
|
84
|
+
|
85
|
+
def initialize( recs_or_stream )
|
86
|
+
## todo/check: check if arg is a stream/enumarator - why? why not??
|
87
|
+
if recs_or_stream.is_a?( String )
|
88
|
+
@recs = Csv.new( recs_or_stream, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
|
89
|
+
else
|
90
|
+
@recs = recs_or_stream
|
91
|
+
end
|
92
|
+
|
93
|
+
@header = []
|
94
|
+
@tags = nil ## add tags = nil -- why? why not?
|
95
|
+
@cols = nil ## column mappings (used "internally")
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
include Enumerable
|
100
|
+
|
101
|
+
def each( &block )
|
102
|
+
@header = []
|
103
|
+
@tags = nil ## add tags = nil -- why? why not?
|
104
|
+
@cols = nil ## column mappings (used "internally")
|
105
|
+
|
106
|
+
@recs.each do |values|
|
107
|
+
## pp values
|
108
|
+
if @cols.nil?
|
109
|
+
if values.any? { |value| value && value.start_with?('#') }
|
110
|
+
@cols = build_cols( values )
|
111
|
+
@tags = values
|
112
|
+
else
|
113
|
+
@header << values
|
114
|
+
end
|
115
|
+
else
|
116
|
+
|
117
|
+
## data row
|
118
|
+
## strip non-tagged - how?
|
119
|
+
record = {}
|
120
|
+
@cols.each_with_index do |col,i|
|
121
|
+
if col.tagged?
|
122
|
+
key = col.tag
|
123
|
+
value = values[i]
|
124
|
+
if col.list?
|
125
|
+
record[ key ] ||= []
|
126
|
+
record[ key ] << value
|
127
|
+
else
|
128
|
+
## add "regular" single value
|
129
|
+
record[ key ] = value
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
## pp record
|
134
|
+
block.call( record )
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end # method each
|
138
|
+
|
139
|
+
def read() to_a; end # method read
|
140
|
+
|
141
|
+
|
142
|
+
###
|
143
|
+
### todo/fix:
|
144
|
+
## add closed? and close
|
145
|
+
## if self.open used without block (user needs to close file "manually")
|
146
|
+
|
147
|
+
|
148
|
+
####
|
149
|
+
# helpers
|
150
|
+
|
151
|
+
|
152
|
+
def build_cols( values )
|
153
|
+
|
154
|
+
## "clean" unify/normalize names
|
155
|
+
values = values.map do |value|
|
156
|
+
if value
|
157
|
+
if value.empty?
|
158
|
+
nil ## make untagged fields nil
|
159
|
+
else
|
160
|
+
## todo: sort attributes by a-to-z
|
161
|
+
## strip / remove all spaces
|
162
|
+
value.strip.gsub('#','') ## remove leading # - why? why not?
|
163
|
+
end
|
164
|
+
else
|
165
|
+
value ## keep (nil) as is
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
170
|
+
counts = {}
|
171
|
+
values.each_with_index do |value,i|
|
172
|
+
if value
|
173
|
+
counts[value] ||= []
|
174
|
+
counts[value] << i
|
175
|
+
end
|
176
|
+
end
|
177
|
+
## pp counts
|
178
|
+
|
179
|
+
|
180
|
+
cols = []
|
181
|
+
values.each do |value|
|
182
|
+
if value
|
183
|
+
count = counts[value]
|
184
|
+
if count.size > 1
|
185
|
+
cols << Column.new( value, list: true )
|
186
|
+
else
|
187
|
+
cols << Column.new( value )
|
188
|
+
end
|
189
|
+
else
|
190
|
+
cols << Column.new
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
cols
|
195
|
+
end
|
196
|
+
|
197
|
+
end # class CsvHuman
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvHuman
|
5
|
+
|
6
|
+
MAJOR = 0
|
7
|
+
MINOR = 1
|
8
|
+
PATCH = 0
|
9
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
|
+
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module CsvHuman
|
data/test/data/test.csv
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
What,,,Who,Where,For whom,
|
2
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
3
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
4
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
5
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
6
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
7
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
data/test/helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
|
5
|
+
require 'minitest/autorun'
|
6
|
+
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'csvhuman'
|
10
|
+
|
11
|
+
|
12
|
+
## add test_data_dir helper
|
13
|
+
class CsvHuman
|
14
|
+
def self.test_data_dir
|
15
|
+
"#{root}/test/data"
|
16
|
+
end
|
17
|
+
end
|
data/test/test_reader.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestReader < MiniTest::Test
|
11
|
+
|
12
|
+
def recs
|
13
|
+
[["Organisation", "Cluster", "Province" ],
|
14
|
+
[ "#org", "#sector", "#adm1" ],
|
15
|
+
[ "Org A", "WASH", "Coastal Province" ],
|
16
|
+
[ "Org B", "Health", "Mountain Province" ],
|
17
|
+
[ "Org C", "Education", "Coastal Province" ],
|
18
|
+
[ "Org A", "WASH", "Plains Province" ]]
|
19
|
+
end
|
20
|
+
|
21
|
+
def txt
|
22
|
+
<<TXT
|
23
|
+
What,,,Who,Where,For whom,
|
24
|
+
Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
|
25
|
+
,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
26
|
+
001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
|
27
|
+
002,Health,Subsector 2,Org 2,Country 2,,,Region 2
|
28
|
+
003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
|
29
|
+
004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
|
30
|
+
TXT
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def test_readme
|
35
|
+
csv = CsvHuman.new( recs )
|
36
|
+
csv.each do |rec|
|
37
|
+
pp rec
|
38
|
+
end
|
39
|
+
|
40
|
+
pp csv.read
|
41
|
+
pp CsvHuman.parse( recs )
|
42
|
+
|
43
|
+
CsvHuman.parse( recs ).each do |rec|
|
44
|
+
pp rec
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
|
49
|
+
pp CsvHuman.parse( txt )
|
50
|
+
CsvHuman.parse( txt ).each do |rec|
|
51
|
+
pp rec
|
52
|
+
end
|
53
|
+
|
54
|
+
CsvHuman.foreach( "#{CsvHuman.test_data_dir}/test.csv" ) do |rec|
|
55
|
+
pp rec
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end # class TestReader
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csvhuman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-11-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csvreader
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.2.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '4.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hoe
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.16'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.16'
|
55
|
+
description: csvhuman - read tabular data in the CSV Humanitarian eXchange Language
|
56
|
+
(HXL) format, that is, comma-separated values (CSV) line-by-line records with a
|
57
|
+
hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
|
58
|
+
email: wwwmake@googlegroups.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files:
|
62
|
+
- HISTORY.md
|
63
|
+
- Manifest.txt
|
64
|
+
- README.md
|
65
|
+
files:
|
66
|
+
- HISTORY.md
|
67
|
+
- Manifest.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- lib/csvhuman.rb
|
71
|
+
- lib/csvhuman/reader.rb
|
72
|
+
- lib/csvhuman/version.rb
|
73
|
+
- test/data/test.csv
|
74
|
+
- test/helper.rb
|
75
|
+
- test/test_reader.rb
|
76
|
+
homepage: https://github.com/csvreader/csvhuman
|
77
|
+
licenses:
|
78
|
+
- Public Domain
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options:
|
82
|
+
- "--main"
|
83
|
+
- README.md
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 2.2.2
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.5.2
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format,
|
102
|
+
that is, comma-separated values (CSV) line-by-line records with a hashtag (meta
|
103
|
+
data) line using the Humanitarian eXchange Language (HXL) rules
|
104
|
+
test_files: []
|