csvhuman 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 358d150c2a69a16f741b0dae47328857787d2dc2
4
+ data.tar.gz: fd36923138a7453510d2d26a4e2997c4475b3aea
5
+ SHA512:
6
+ metadata.gz: 1540846d223cb4bcf8dd4d2982f5cdc96b13966328f4037d84d6b1a7f8a3bee998856edcc3aaeb7d14e476f12e5a3cc5fd81d81a6aeed6a44e55c39a24cd8144
7
+ data.tar.gz: 2fbc7a4ee6f22f75ab4cdea35c5411fc311baf4185d5ebc3012ae4b99a43d301a93a70ff388ac58aaa3eec0eb630d9c5742e94e67cb583214d88dfb05fadebad
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2018-11-06
2
+
3
+ * Everything is new. First release
@@ -0,0 +1,10 @@
1
+ HISTORY.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/csvhuman.rb
6
+ lib/csvhuman/reader.rb
7
+ lib/csvhuman/version.rb
8
+ test/data/test.csv
9
+ test/helper.rb
10
+ test/test_reader.rb
@@ -0,0 +1,27 @@
1
+ # CSV Humanitarian eXchange Language (HXL) Parser / Reader
2
+
3
+ csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
4
+
5
+ * home :: [github.com/csvreader/csvhuman](https://github.com/csvreader/csvhuman)
6
+ * bugs :: [github.com/csvreader/csvhuman/issues](https://github.com/csvreader/csvhuman/issues)
7
+ * gem :: [rubygems.org/gems/csvhuman](https://rubygems.org/gems/csvhuman)
8
+ * rdoc :: [rubydoc.info/gems/csvhuman](http://rubydoc.info/gems/csvhuman)
9
+ * forum :: [wwwmake](http://groups.google.com/group/wwwmake)
10
+
11
+
12
+
13
+ ## Usage
14
+
15
+ to be done
16
+
17
+
18
+ ## License
19
+
20
+ The `csvhuman` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Send them along to the [wwwmake forum](http://groups.google.com/group/wwwmake).
27
+ Thanks!
@@ -0,0 +1,30 @@
1
+ require 'hoe'
2
+ require './lib/csvhuman/version.rb'
3
+
4
+ Hoe.spec 'csvhuman' do
5
+
6
+ self.version = CsvHuman::VERSION
7
+
8
+ self.summary = "csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules"
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/csvreader/csvhuman']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'wwwmake@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.extra_deps = [
21
+ ['csvreader', '>=1.2.1']
22
+ ]
23
+
24
+ self.licenses = ['Public Domain']
25
+
26
+ self.spec_extras = {
27
+ required_ruby_version: '>= 2.2.2'
28
+ }
29
+
30
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'pp'
4
+ require 'logger'
5
+
6
+
7
+ require 'csvreader'
8
+
9
+ ## our own code
10
+ require 'csvhuman/version' # note: let version always go first
11
+ require 'csvhuman/reader'
12
+
13
+
14
+
15
+ ## add some "alternative" shortcut aliases
16
+ CsvHum = CsvHuman
17
+ CSV_HXL = CsvHuman
18
+ CSVHXL = CsvHuman
19
+ HXL = CsvHuman
20
+
21
+
22
+
23
+ # say hello
24
+ puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
@@ -0,0 +1,197 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvHuman
5
+
6
+ ## Q: how to deal with un-tagged fields?
7
+ ## - skip / ignore
8
+ ## Q: how to deal duplicate fields (e.g. #sex+#targeted,#sex+#targeted)?
9
+ ## - value (auto-magically) turned into an array / list
10
+
11
+
12
+ def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
13
+
14
+ ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
15
+ ## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
16
+ csv = Csv.open( path, mode, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
17
+ human = new( csv )
18
+
19
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
20
+ if block_given?
21
+ begin
22
+ block.call( human )
23
+ ensure
24
+ csv.close
25
+ end
26
+ else
27
+ human
28
+ end
29
+ end # method self.open
30
+
31
+
32
+ def self.read( path )
33
+ open( path ) { |human| human.read }
34
+ end
35
+
36
+
37
+ def self.foreach( path, &block )
38
+ csv = Csv.open( path, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
39
+ human = new( csv )
40
+
41
+ if block_given?
42
+ begin
43
+ human.each( &block )
44
+ ensure
45
+ csv.close
46
+ end
47
+ else
48
+ human.to_enum ## note: caller (responsible) must close file!!!
49
+ ## remove version without block given - why? why not?
50
+ ## use Csv.open().to_enum or Csv.open().each
51
+ ## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
52
+ end
53
+ end # method self.foreach
54
+
55
+
56
+ def self.parse( str_or_readable, &block )
57
+ human = new( str_or_readable )
58
+
59
+ if block_given?
60
+ human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
61
+ else # slurp contents, if no block is given
62
+ human.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
63
+ end
64
+ end # method self.parse
65
+
66
+
67
+
68
+
69
+ class Column
70
+ attr_reader :tag
71
+
72
+ def initialize( tag=nil, list: false )
73
+ @tag = tag
74
+ @list = list
75
+ end
76
+
77
+ def tagged?() @tag.nil? == false; end
78
+ def list?() @list; end
79
+ end # class Column
80
+
81
+
82
+
83
+ attr_reader :header, :tags
84
+
85
+ def initialize( recs_or_stream )
86
+ ## todo/check: check if arg is a stream/enumarator - why? why not??
87
+ if recs_or_stream.is_a?( String )
88
+ @recs = Csv.new( recs_or_stream, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
89
+ else
90
+ @recs = recs_or_stream
91
+ end
92
+
93
+ @header = []
94
+ @tags = nil ## add tags = nil -- why? why not?
95
+ @cols = nil ## column mappings (used "internally")
96
+ end
97
+
98
+
99
+ include Enumerable
100
+
101
+ def each( &block )
102
+ @header = []
103
+ @tags = nil ## add tags = nil -- why? why not?
104
+ @cols = nil ## column mappings (used "internally")
105
+
106
+ @recs.each do |values|
107
+ ## pp values
108
+ if @cols.nil?
109
+ if values.any? { |value| value && value.start_with?('#') }
110
+ @cols = build_cols( values )
111
+ @tags = values
112
+ else
113
+ @header << values
114
+ end
115
+ else
116
+
117
+ ## data row
118
+ ## strip non-tagged - how?
119
+ record = {}
120
+ @cols.each_with_index do |col,i|
121
+ if col.tagged?
122
+ key = col.tag
123
+ value = values[i]
124
+ if col.list?
125
+ record[ key ] ||= []
126
+ record[ key ] << value
127
+ else
128
+ ## add "regular" single value
129
+ record[ key ] = value
130
+ end
131
+ end
132
+ end
133
+ ## pp record
134
+ block.call( record )
135
+ end
136
+ end
137
+ end # method each
138
+
139
+ def read() to_a; end # method read
140
+
141
+
142
+ ###
143
+ ### todo/fix:
144
+ ## add closed? and close
145
+ ## if self.open used without block (user needs to close file "manually")
146
+
147
+
148
+ ####
149
+ # helpers
150
+
151
+
152
+ def build_cols( values )
153
+
154
+ ## "clean" unify/normalize names
155
+ values = values.map do |value|
156
+ if value
157
+ if value.empty?
158
+ nil ## make untagged fields nil
159
+ else
160
+ ## todo: sort attributes by a-to-z
161
+ ## strip / remove all spaces
162
+ value.strip.gsub('#','') ## remove leading # - why? why not?
163
+ end
164
+ else
165
+ value ## keep (nil) as is
166
+ end
167
+ end
168
+
169
+
170
+ counts = {}
171
+ values.each_with_index do |value,i|
172
+ if value
173
+ counts[value] ||= []
174
+ counts[value] << i
175
+ end
176
+ end
177
+ ## pp counts
178
+
179
+
180
+ cols = []
181
+ values.each do |value|
182
+ if value
183
+ count = counts[value]
184
+ if count.size > 1
185
+ cols << Column.new( value, list: true )
186
+ else
187
+ cols << Column.new( value )
188
+ end
189
+ else
190
+ cols << Column.new
191
+ end
192
+ end
193
+
194
+ cols
195
+ end
196
+
197
+ end # class CsvHuman
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvHuman
5
+
6
+ MAJOR = 0
7
+ MINOR = 1
8
+ PATCH = 0
9
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
10
+
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
22
+ end
23
+
24
+ end # module CsvHuman
@@ -0,0 +1,7 @@
1
+ What,,,Who,Where,For whom,
2
+ Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
3
+ ,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
4
+ 001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
5
+ 002,Health,Subsector 2,Org 2,Country 2,,,Region 2
6
+ 003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
7
+ 004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
@@ -0,0 +1,17 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+ require 'csvhuman'
10
+
11
+
12
+ ## add test_data_dir helper
13
+ class CsvHuman
14
+ def self.test_data_dir
15
+ "#{root}/test/data"
16
+ end
17
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_reader.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestReader < MiniTest::Test
11
+
12
+ def recs
13
+ [["Organisation", "Cluster", "Province" ],
14
+ [ "#org", "#sector", "#adm1" ],
15
+ [ "Org A", "WASH", "Coastal Province" ],
16
+ [ "Org B", "Health", "Mountain Province" ],
17
+ [ "Org C", "Education", "Coastal Province" ],
18
+ [ "Org A", "WASH", "Plains Province" ]]
19
+ end
20
+
21
+ def txt
22
+ <<TXT
23
+ What,,,Who,Where,For whom,
24
+ Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
25
+ ,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
26
+ 001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
27
+ 002,Health,Subsector 2,Org 2,Country 2,,,Region 2
28
+ 003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
29
+ 004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
30
+ TXT
31
+ end
32
+
33
+
34
+ def test_readme
35
+ csv = CsvHuman.new( recs )
36
+ csv.each do |rec|
37
+ pp rec
38
+ end
39
+
40
+ pp csv.read
41
+ pp CsvHuman.parse( recs )
42
+
43
+ CsvHuman.parse( recs ).each do |rec|
44
+ pp rec
45
+ end
46
+
47
+
48
+ pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
49
+ pp CsvHuman.parse( txt )
50
+ CsvHuman.parse( txt ).each do |rec|
51
+ pp rec
52
+ end
53
+
54
+ CsvHuman.foreach( "#{CsvHuman.test_data_dir}/test.csv" ) do |rec|
55
+ pp rec
56
+ end
57
+ end
58
+
59
+ end # class TestReader
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csvhuman
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csvreader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: hoe
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.16'
55
+ description: csvhuman - read tabular data in the CSV Humanitarian eXchange Language
56
+ (HXL) format, that is, comma-separated values (CSV) line-by-line records with a
57
+ hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
58
+ email: wwwmake@googlegroups.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files:
62
+ - HISTORY.md
63
+ - Manifest.txt
64
+ - README.md
65
+ files:
66
+ - HISTORY.md
67
+ - Manifest.txt
68
+ - README.md
69
+ - Rakefile
70
+ - lib/csvhuman.rb
71
+ - lib/csvhuman/reader.rb
72
+ - lib/csvhuman/version.rb
73
+ - test/data/test.csv
74
+ - test/helper.rb
75
+ - test/test_reader.rb
76
+ homepage: https://github.com/csvreader/csvhuman
77
+ licenses:
78
+ - Public Domain
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options:
82
+ - "--main"
83
+ - README.md
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 2.2.2
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.5.2
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format,
102
+ that is, comma-separated values (CSV) line-by-line records with a hashtag (meta
103
+ data) line using the Humanitarian eXchange Language (HXL) rules
104
+ test_files: []