csvhuman 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 358d150c2a69a16f741b0dae47328857787d2dc2
4
+ data.tar.gz: fd36923138a7453510d2d26a4e2997c4475b3aea
5
+ SHA512:
6
+ metadata.gz: 1540846d223cb4bcf8dd4d2982f5cdc96b13966328f4037d84d6b1a7f8a3bee998856edcc3aaeb7d14e476f12e5a3cc5fd81d81a6aeed6a44e55c39a24cd8144
7
+ data.tar.gz: 2fbc7a4ee6f22f75ab4cdea35c5411fc311baf4185d5ebc3012ae4b99a43d301a93a70ff388ac58aaa3eec0eb630d9c5742e94e67cb583214d88dfb05fadebad
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2018-11-06
2
+
3
+ * Everything is new. First release
@@ -0,0 +1,10 @@
1
+ HISTORY.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/csvhuman.rb
6
+ lib/csvhuman/reader.rb
7
+ lib/csvhuman/version.rb
8
+ test/data/test.csv
9
+ test/helper.rb
10
+ test/test_reader.rb
@@ -0,0 +1,27 @@
1
+ # CSV Humanitarian eXchange Language (HXL) Parser / Reader
2
+
3
+ csvhuman library / gem - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
4
+
5
+ * home :: [github.com/csvreader/csvhuman](https://github.com/csvreader/csvhuman)
6
+ * bugs :: [github.com/csvreader/csvhuman/issues](https://github.com/csvreader/csvhuman/issues)
7
+ * gem :: [rubygems.org/gems/csvhuman](https://rubygems.org/gems/csvhuman)
8
+ * rdoc :: [rubydoc.info/gems/csvhuman](http://rubydoc.info/gems/csvhuman)
9
+ * forum :: [wwwmake](http://groups.google.com/group/wwwmake)
10
+
11
+
12
+
13
+ ## Usage
14
+
15
+ to be done
16
+
17
+
18
+ ## License
19
+
20
+ The `csvhuman` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Send them along to the [wwwmake forum](http://groups.google.com/group/wwwmake).
27
+ Thanks!
@@ -0,0 +1,30 @@
1
+ require 'hoe'
2
+ require './lib/csvhuman/version.rb'
3
+
4
+ Hoe.spec 'csvhuman' do
5
+
6
+ self.version = CsvHuman::VERSION
7
+
8
+ self.summary = "csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules"
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/csvreader/csvhuman']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'wwwmake@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.extra_deps = [
21
+ ['csvreader', '>=1.2.1']
22
+ ]
23
+
24
+ self.licenses = ['Public Domain']
25
+
26
+ self.spec_extras = {
27
+ required_ruby_version: '>= 2.2.2'
28
+ }
29
+
30
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'pp'
4
+ require 'logger'
5
+
6
+
7
+ require 'csvreader'
8
+
9
+ ## our own code
10
+ require 'csvhuman/version' # note: let version always go first
11
+ require 'csvhuman/reader'
12
+
13
+
14
+
15
+ ## add some "alternative" shortcut aliases
16
+ CsvHum = CsvHuman
17
+ CSV_HXL = CsvHuman
18
+ CSVHXL = CsvHuman
19
+ HXL = CsvHuman
20
+
21
+
22
+
23
+ # say hello
24
+ puts CsvHuman.banner if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
@@ -0,0 +1,197 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvHuman
5
+
6
+ ## Q: how to deal with un-tagged fields?
7
+ ## - skip / ignore
8
+ ## Q: how to deal duplicate fields (e.g. #sex+#targeted,#sex+#targeted)?
9
+ ## - value (auto-magically) turned into an array / list
10
+
11
+
12
+ def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
13
+
14
+ ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
15
+ ## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
16
+ csv = Csv.open( path, mode, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
17
+ human = new( csv )
18
+
19
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
20
+ if block_given?
21
+ begin
22
+ block.call( human )
23
+ ensure
24
+ csv.close
25
+ end
26
+ else
27
+ human
28
+ end
29
+ end # method self.open
30
+
31
+
32
+ def self.read( path )
33
+ open( path ) { |human| human.read }
34
+ end
35
+
36
+
37
+ def self.foreach( path, &block )
38
+ csv = Csv.open( path, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
39
+ human = new( csv )
40
+
41
+ if block_given?
42
+ begin
43
+ human.each( &block )
44
+ ensure
45
+ csv.close
46
+ end
47
+ else
48
+ human.to_enum ## note: caller (responsible) must close file!!!
49
+ ## remove version without block given - why? why not?
50
+ ## use Csv.open().to_enum or Csv.open().each
51
+ ## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
52
+ end
53
+ end # method self.foreach
54
+
55
+
56
+ def self.parse( str_or_readable, &block )
57
+ human = new( str_or_readable )
58
+
59
+ if block_given?
60
+ human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
61
+ else # slurp contents, if no block is given
62
+ human.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
63
+ end
64
+ end # method self.parse
65
+
66
+
67
+
68
+
69
+ class Column
70
+ attr_reader :tag
71
+
72
+ def initialize( tag=nil, list: false )
73
+ @tag = tag
74
+ @list = list
75
+ end
76
+
77
+ def tagged?() @tag.nil? == false; end
78
+ def list?() @list; end
79
+ end # class Column
80
+
81
+
82
+
83
+ attr_reader :header, :tags
84
+
85
+ def initialize( recs_or_stream )
86
+ ## todo/check: check if arg is a stream/enumarator - why? why not??
87
+ if recs_or_stream.is_a?( String )
88
+ @recs = Csv.new( recs_or_stream, parser: Csv::Parser.human ) ## note: returns an enumarator-like object
89
+ else
90
+ @recs = recs_or_stream
91
+ end
92
+
93
+ @header = []
94
+ @tags = nil ## add tags = nil -- why? why not?
95
+ @cols = nil ## column mappings (used "internally")
96
+ end
97
+
98
+
99
+ include Enumerable
100
+
101
+ def each( &block )
102
+ @header = []
103
+ @tags = nil ## add tags = nil -- why? why not?
104
+ @cols = nil ## column mappings (used "internally")
105
+
106
+ @recs.each do |values|
107
+ ## pp values
108
+ if @cols.nil?
109
+ if values.any? { |value| value && value.start_with?('#') }
110
+ @cols = build_cols( values )
111
+ @tags = values
112
+ else
113
+ @header << values
114
+ end
115
+ else
116
+
117
+ ## data row
118
+ ## strip non-tagged - how?
119
+ record = {}
120
+ @cols.each_with_index do |col,i|
121
+ if col.tagged?
122
+ key = col.tag
123
+ value = values[i]
124
+ if col.list?
125
+ record[ key ] ||= []
126
+ record[ key ] << value
127
+ else
128
+ ## add "regular" single value
129
+ record[ key ] = value
130
+ end
131
+ end
132
+ end
133
+ ## pp record
134
+ block.call( record )
135
+ end
136
+ end
137
+ end # method each
138
+
139
+ def read() to_a; end # method read
140
+
141
+
142
+ ###
143
+ ### todo/fix:
144
+ ## add closed? and close
145
+ ## if self.open used without block (user needs to close file "manually")
146
+
147
+
148
+ ####
149
+ # helpers
150
+
151
+
152
+ def build_cols( values )
153
+
154
+ ## "clean" unify/normalize names
155
+ values = values.map do |value|
156
+ if value
157
+ if value.empty?
158
+ nil ## make untagged fields nil
159
+ else
160
+ ## todo: sort attributes by a-to-z
161
+ ## strip / remove all spaces
162
+ value.strip.gsub('#','') ## remove leading # - why? why not?
163
+ end
164
+ else
165
+ value ## keep (nil) as is
166
+ end
167
+ end
168
+
169
+
170
+ counts = {}
171
+ values.each_with_index do |value,i|
172
+ if value
173
+ counts[value] ||= []
174
+ counts[value] << i
175
+ end
176
+ end
177
+ ## pp counts
178
+
179
+
180
+ cols = []
181
+ values.each do |value|
182
+ if value
183
+ count = counts[value]
184
+ if count.size > 1
185
+ cols << Column.new( value, list: true )
186
+ else
187
+ cols << Column.new( value )
188
+ end
189
+ else
190
+ cols << Column.new
191
+ end
192
+ end
193
+
194
+ cols
195
+ end
196
+
197
+ end # class CsvHuman
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvHuman
5
+
6
+ MAJOR = 0
7
+ MINOR = 1
8
+ PATCH = 0
9
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
10
+
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
22
+ end
23
+
24
+ end # module CsvHuman
@@ -0,0 +1,7 @@
1
+ What,,,Who,Where,For whom,
2
+ Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
3
+ ,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
4
+ 001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
5
+ 002,Health,Subsector 2,Org 2,Country 2,,,Region 2
6
+ 003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
7
+ 004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
@@ -0,0 +1,17 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+ require 'csvhuman'
10
+
11
+
12
+ ## add test_data_dir helper
13
+ class CsvHuman
14
+ def self.test_data_dir
15
+ "#{root}/test/data"
16
+ end
17
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_reader.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestReader < MiniTest::Test
11
+
12
+ def recs
13
+ [["Organisation", "Cluster", "Province" ],
14
+ [ "#org", "#sector", "#adm1" ],
15
+ [ "Org A", "WASH", "Coastal Province" ],
16
+ [ "Org B", "Health", "Mountain Province" ],
17
+ [ "Org C", "Education", "Coastal Province" ],
18
+ [ "Org A", "WASH", "Plains Province" ]]
19
+ end
20
+
21
+ def txt
22
+ <<TXT
23
+ What,,,Who,Where,For whom,
24
+ Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
25
+ ,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
26
+ 001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
27
+ 002,Health,Subsector 2,Org 2,Country 2,,,Region 2
28
+ 003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
29
+ 004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
30
+ TXT
31
+ end
32
+
33
+
34
+ def test_readme
35
+ csv = CsvHuman.new( recs )
36
+ csv.each do |rec|
37
+ pp rec
38
+ end
39
+
40
+ pp csv.read
41
+ pp CsvHuman.parse( recs )
42
+
43
+ CsvHuman.parse( recs ).each do |rec|
44
+ pp rec
45
+ end
46
+
47
+
48
+ pp CsvHuman.read( "#{CsvHuman.test_data_dir}/test.csv" )
49
+ pp CsvHuman.parse( txt )
50
+ CsvHuman.parse( txt ).each do |rec|
51
+ pp rec
52
+ end
53
+
54
+ CsvHuman.foreach( "#{CsvHuman.test_data_dir}/test.csv" ) do |rec|
55
+ pp rec
56
+ end
57
+ end
58
+
59
+ end # class TestReader
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csvhuman
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csvreader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: hoe
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.16'
55
+ description: csvhuman - read tabular data in the CSV Humanitarian eXchange Language
56
+ (HXL) format, that is, comma-separated values (CSV) line-by-line records with a
57
+ hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
58
+ email: wwwmake@googlegroups.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files:
62
+ - HISTORY.md
63
+ - Manifest.txt
64
+ - README.md
65
+ files:
66
+ - HISTORY.md
67
+ - Manifest.txt
68
+ - README.md
69
+ - Rakefile
70
+ - lib/csvhuman.rb
71
+ - lib/csvhuman/reader.rb
72
+ - lib/csvhuman/version.rb
73
+ - test/data/test.csv
74
+ - test/helper.rb
75
+ - test/test_reader.rb
76
+ homepage: https://github.com/csvreader/csvhuman
77
+ licenses:
78
+ - Public Domain
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options:
82
+ - "--main"
83
+ - README.md
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 2.2.2
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.5.2
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL) format,
102
+ that is, comma-separated values (CSV) line-by-line records with a hashtag (meta
103
+ data) line using the Humanitarian eXchange Language (HXL) rules
104
+ test_files: []