csvobj 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+
2
+ Copyright 2010 Martin Carpenter. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are
5
+ permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
11
+ of conditions and the following disclaimer in the documentation and/or other materials
12
+ provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY Martin Carpenter ``AS IS'' AND ANY EXPRESS OR IMPLIED
15
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
16
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Martin Carpenter OR
17
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
+
24
+ The views and conclusions contained in the software and documentation are those of the
25
+ authors and should not be interpreted as representing official policies, either expressed
26
+ or implied, of Martin Carpenter.
27
+
@@ -0,0 +1,53 @@
1
+
2
+ = CSVobj
3
+
4
+ == Aims
5
+
6
+ CSVobj has two simple aims in the processing of CSV files:
7
+
8
+ * clarity
9
+ * robustness
10
+
11
+ It achieves these aims by automatically instantiating a class with
12
+ dynamic attributes that can be used to refer to each column of the CSV
13
+ by name. Each attribute name is derived in a predictable fashion from
14
+ the column headers (the first row of the CSV).
15
+
16
+ === Clarity
17
+
18
+ Rather than referring programatically to the "third element of the row"
19
+ ("row[2]") one can simply say "row.last_name".
20
+
21
+ === Robustness
22
+
23
+ Any change to the CSV format (eg by inserting a new column between two
24
+ existing columns) will not require a program change: you do not need to
25
+ hunt down all references to "row[2]" and make them "row[3]"; they remain
26
+ "row.last_name".
27
+
28
+ == Example
29
+
30
+ Given the following CSV file:
31
+
32
+ First Name,Last Name
33
+ Cheryl,James
34
+ Sandra,Denton
35
+ Deidra,Roper
36
+
37
+ we can extract just the last names as follows:
38
+
39
+ # Get the gem
40
+ require 'rubygems'
41
+ require 'csvobj'
42
+
43
+ # Subclass because #parse defines methods on the class
44
+ class MyCsv < CSVobj ; end
45
+
46
+ # First command line argument is CSV file to read
47
+ csv_file = File.new( ARGV[0] )
48
+
49
+ # Parse the file, print the "last name" column of each row
50
+ MyCsv.parse(csv_file).each do |row|
51
+ puts row.last_name
52
+ end
53
+
@@ -0,0 +1,40 @@
1
+
2
+ require 'rake'
3
+ require 'rake/rdoctask'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/testtask'
6
+
7
+ desc 'Default task (test)'
8
+ task :default => [:test]
9
+
10
+ desc 'Run unit tests'
11
+ Rake::TestTask.new('test') do |test|
12
+ test.pattern = 'test/*.rb'
13
+ test.warning = true
14
+ end
15
+
16
+ task :gem
17
+ spec = eval( File.read('csvobj.gemspec') )
18
+ Rake::GemPackageTask.new(spec) do |pkg|
19
+ pkg.need_tar = true
20
+ end
21
+
22
+ desc 'Generate rdoc'
23
+ Rake::RDocTask.new do |rdoc|
24
+ rdoc.rdoc_dir = 'rdoc'
25
+ rdoc.title = 'csvobj.rb'
26
+ rdoc.options << '--line-numbers'
27
+ rdoc.options << '--inline-source'
28
+ rdoc.options << '-A cattr_accessor=object'
29
+ rdoc.options << '--charset' << 'utf-8'
30
+ rdoc.options << '--all'
31
+ rdoc.rdoc_files.include('README.rdoc')
32
+ rdoc.rdoc_files.include('lib/csvobj.rb')
33
+ rdoc.rdoc_files.include( Dir.glob( File.join('test', '*.rb') ) )
34
+ end
35
+
36
+ desc 'Clean up'
37
+ task :clean do
38
+ FileUtils.rm( Dir.glob( File.join('pkg', '*') ) )
39
+ FileUtils.rm_r( Dir.glob( File.join('rdoc', '*') ) )
40
+ end
@@ -0,0 +1,130 @@
1
+
2
+ require 'csv'
3
+
4
+ # Generic exception for CSVobj class: actual exceptions are subclassed.
5
+ class CSVobjException < Exception ; end
6
+
7
+ # Raised if the read CSV header row contains a duplicate field.
8
+ class CSVobjDuplicateHeader < CSVobjException ; end
9
+
10
+ # This class represents each row of a CSV file as an object in an array.
11
+ # Object attributes are automatically created from the header (first)
12
+ # row in the CSV. Note that this class dynamically redefines its own
13
+ # attributes and constructor so you should subclass it for any remotely
14
+ # serious work.
15
+ class CSVobj
16
+
17
+ # Parse the given CSV, which may be a multi-line string or
18
+ # IO object (see #parse_s) or an array of row arrays (see
19
+ # #parse_a) and return an array of CSVobjs.
20
+ # Expects the first row to be the headers (these are used
21
+ # for the objects' attributes).
22
+ def self.parse(csv)
23
+ csv.is_a?(Array) ? parse_a(csv) : parse_s(csv)
24
+ end
25
+
26
+ # Interpret the given multi-line string or IO object of
27
+ # CSV records and return an array of CSVobjs.
28
+ # Expects the first row to be the headers (these are used
29
+ # for the objects' attributes).
30
+ def self.parse_s(csv)
31
+ rows = s_to_a(csv)
32
+ parse_a(rows)
33
+ end
34
+
35
+ # Interpret an array (rows) of arrays (cells) and return
36
+ # an array of CSVobjs.
37
+ # Expects the first row to be the headers (these are used
38
+ # for the objects' attributes).
39
+ def self.parse_a(rows)
40
+ return [] if rows.empty?
41
+
42
+ # Get attributes from first row.
43
+ headers = rows.shift
44
+ attr_symbols = headers.map{ |header| header_to_attr(header) }
45
+ dupes = attr_symbols.uniq!
46
+ raise CSVobjDuplicateHeader, "Duplicate derived headers\n#{dupes}" if dupes
47
+
48
+ # Declare attributes and constructor. This will take the form:
49
+ # #new(header1, header2, ...).
50
+ instance_variables = attr_symbols.map{ |attr| "@#{attr}" }
51
+ class_eval do
52
+ attr_accessor(*attr_symbols)
53
+ define_method :initialize do |*args|
54
+ instance_variables.zip(args).each do |instance_variable, arg|
55
+ instance_variable_set(instance_variable, arg)
56
+ end
57
+ end
58
+ end
59
+
60
+ # Define class method #headers to return an array of the (unmangled)
61
+ # CSV headers by defining the method on the metaclass.
62
+ (class << self ; self end).send(:define_method, :headers) { headers }
63
+
64
+ # Create and return array of CSVobj, one element for each CSV row
65
+ # (minus the headers).
66
+ rows.map { |row| new(*row) }
67
+ end
68
+
69
+ # Convert a header (string) value to a an attribute symbol:
70
+ # Remove leading and trailing whitespace;
71
+ # replace (repeated) non-word characters with a single underscore;
72
+ # prefix leading digit with underscore;
73
+ # remove repeated adjacent underscores;
74
+ # convert to a lower-case symbol.
75
+ def self.header_to_attr(header)
76
+ header.strip. # remove trailing and leading whitespace
77
+ gsub(/\W+/, '_'). # substitute underscore for non-word/digit characters
78
+ sub(/^(\d)/, '_\1'). # if starts with a digit insert a leading underscore
79
+ gsub(/_+/, '_'). # remove duplicate adjacent underscores
80
+ downcase. # convert to lower case
81
+ to_sym # convert to symbol
82
+ end
83
+
84
+ # Return an array of (string) values for this CSV object in the order
85
+ # that they were given. See dynamically defined method +headers+ to get
86
+ # the equivalent array of headers.
87
+ def to_a
88
+ self.class.headers.map do |header|
89
+ attr = self.class.header_to_attr(header)
90
+ send(attr)
91
+ end
92
+ end
93
+
94
+ # Return a CSV string representing this object. Does not include headers
95
+ # (see #to_s_with_headers).
96
+ def to_s
97
+ a_to_s(to_a)
98
+ end
99
+
100
+ # Return a CSV string representing this object including an initial line
101
+ # of headers as originally given on object creation.
102
+ def to_s_with_headers
103
+ a_to_s(self.class.headers) + to_s
104
+ end
105
+
106
+ private
107
+
108
+ # Abstraction to deal with ruby 1.8/1.9 CSV incompatibilities.
109
+ # Takes an IO-like object or a string, returns an array of CSV objects.
110
+ def self.s_to_a(io_or_string)
111
+ if CSV.const_defined?(:Reader) # ruby 1.8
112
+ CSV::Reader.create( io_or_string ).to_a
113
+ else # ruby 1.9 and beyond
114
+ CSV.parse( io_or_string )
115
+ end
116
+ end
117
+
118
+ # Abstraction to deal with ruby 1.8/1.9 CSV class incompatibilities.
119
+ # Takes an array of values and returns a CSV-encoded string.
120
+ def a_to_s(a)
121
+ if CSV.const_defined?(:Reader) # ruby 1.8
122
+ (CSV::Writer.generate(s='') << a).close
123
+ s
124
+ else # ruby 1.9 and beyond
125
+ CSV.generate { |csv| csv << a }
126
+ end
127
+ end
128
+
129
+ end
130
+
@@ -0,0 +1,33 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestHeaderToAttr < Test::Unit::TestCase
7
+
8
+ def test_lowercase
9
+ assert_equal(:header, CSVobj.header_to_attr('HEADER'))
10
+ end
11
+
12
+ def test_remove_leading_whitespace
13
+ assert_equal(:header, CSVobj.header_to_attr(' header'))
14
+ end
15
+
16
+ def test_remove_trailing_whitespace
17
+ assert_equal(:header, CSVobj.header_to_attr('header '))
18
+ end
19
+
20
+ def test_substitute_underscore_for_non_word_char
21
+ assert_equal(:_hea_der_, CSVobj.header_to_attr('^hea&der('))
22
+ end
23
+
24
+ def test_no_repeated_adjacent_underscores
25
+ assert_equal(:hea_der, CSVobj.header_to_attr('hea^&*der'))
26
+ end
27
+
28
+ def test_leading_digit
29
+ assert_equal(:_9header, CSVobj.header_to_attr('9header'))
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,29 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestHeaders < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_simple
17
+ s = 'foo,bar,baz'
18
+ objs = @parser.parse(s)
19
+ assert_equal(%w[ foo bar baz ], @parser.headers)
20
+ end
21
+
22
+ def test_not_mangled
23
+ s = 'Header one,!Header two'
24
+ objs = @parser.parse(s)
25
+ assert_equal([ 'Header one', '!Header two' ], @parser.headers)
26
+ end
27
+
28
+ end
29
+
@@ -0,0 +1,118 @@
1
+
2
+ require 'test/unit'
3
+ require 'stringio'
4
+
5
+ require 'csvobj'
6
+
7
+ class TestParse < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @parser = Class.new(CSVobj)
11
+ end
12
+
13
+ def teardown
14
+ @parser = nil
15
+ end
16
+
17
+ def test_duplicate_header
18
+ s = "foo,bar,bar,baz\n1,2,3,4\n"
19
+ assert_raise CSVobjDuplicateHeader do
20
+ @parser.parse(s)
21
+ end
22
+ end
23
+
24
+ def test_one_obj_from_string
25
+ s = "foo,bar,baz\n1,2,3\n"
26
+ objs = @parser.parse(s)
27
+ assert_equal(1, objs.size)
28
+ assert_equal('1', objs[0].foo)
29
+ assert_equal('2', objs[0].bar)
30
+ assert_equal('3', objs[0].baz)
31
+ end
32
+
33
+ def test_two_objs_from_string
34
+ s = "foo,bar,baz\n1,2,3\n4,5,6\n"
35
+ objs = @parser.parse(s)
36
+ assert_equal(2, objs.size)
37
+ assert_equal('1', objs[0].foo)
38
+ assert_equal('2', objs[0].bar)
39
+ assert_equal('3', objs[0].baz)
40
+ assert_equal('4', objs[1].foo)
41
+ assert_equal('5', objs[1].bar)
42
+ assert_equal('6', objs[1].baz)
43
+ end
44
+
45
+ def test_two_objs_from_file
46
+ s = "foo,bar,baz\n1,2,3\n4,5,6\n"
47
+ fileish = StringIO.new(s)
48
+ objs = @parser.parse(fileish)
49
+ assert_equal(2, objs.size)
50
+ assert_equal('1', objs[0].foo)
51
+ assert_equal('2', objs[0].bar)
52
+ assert_equal('3', objs[0].baz)
53
+ assert_equal('4', objs[1].foo)
54
+ assert_equal('5', objs[1].bar)
55
+ assert_equal('6', objs[1].baz)
56
+ end
57
+
58
+ def test_no_trailing_newline
59
+ s = "foo,bar,baz\n1,2,3"
60
+ objs = @parser.parse(s)
61
+ assert_equal(1, objs.size)
62
+ assert_equal('1', objs[0].foo)
63
+ assert_equal('2', objs[0].bar)
64
+ assert_equal('3', objs[0].baz)
65
+ end
66
+
67
+ def test_missing_data_cell
68
+ s = "foo,bar,baz\n1,2\n4,5,6\n"
69
+ objs = @parser.parse(s)
70
+ assert_equal(2, objs.size)
71
+ assert_equal('1', objs[0].foo)
72
+ assert_equal('2', objs[0].bar)
73
+ assert_equal(nil, objs[0].baz)
74
+ assert_equal('4', objs[1].foo)
75
+ assert_equal('5', objs[1].bar)
76
+ assert_equal('6', objs[1].baz)
77
+ end
78
+
79
+ def test_missing_header_cell
80
+ s = "foo,bar\n1,2,3\n"
81
+ objs = @parser.parse(s)
82
+ assert_equal(1, objs.size)
83
+ assert_equal('1', objs[0].foo)
84
+ assert_equal('2', objs[0].bar)
85
+ assert_raise NoMethodError do
86
+ objs[0].baz
87
+ end
88
+ end
89
+
90
+ def test_missing_data
91
+ s = "foo,bar,baz\n"
92
+ objs = @parser.parse(s)
93
+ assert_equal([], objs)
94
+ end
95
+
96
+ def test_empty
97
+ objs = @parser.parse('')
98
+ assert_equal([], objs)
99
+ end
100
+
101
+ def test_array
102
+ a = [
103
+ [ 'foo', 'bar', 'baz' ],
104
+ [ '1', '2', '3' ],
105
+ [ '4', '5', '6' ]
106
+ ]
107
+ objs = @parser.parse(a)
108
+ assert_equal(2, objs.size)
109
+ assert_equal('1', objs[0].foo)
110
+ assert_equal('2', objs[0].bar)
111
+ assert_equal('3', objs[0].baz)
112
+ assert_equal('4', objs[1].foo)
113
+ assert_equal('5', objs[1].bar)
114
+ assert_equal('6', objs[1].baz)
115
+ end
116
+
117
+ end
118
+
@@ -0,0 +1,38 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestToA < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_one_complete_object
17
+ s = "foo,bar,baz\n1,2,3\n"
18
+ objs = @parser.parse(s)
19
+ assert_equal(1, objs.size)
20
+ assert_equal(%w[ 1 2 3 ], objs[0].to_a)
21
+ end
22
+
23
+ def test_one_object_missing_data_cell
24
+ s = "foo,bar,baz\n1,2\n"
25
+ objs = @parser.parse(s)
26
+ assert_equal(1, objs.size)
27
+ assert_equal([ '1', '2', nil ], objs[0].to_a)
28
+ end
29
+
30
+ def test_one_object_missing_header
31
+ s = "foo,bar\n1,2,3\n"
32
+ objs = @parser.parse(s)
33
+ assert_equal(1, objs.size)
34
+ assert_equal(%w[ 1 2 ], objs[0].to_a)
35
+ end
36
+
37
+ end
38
+
@@ -0,0 +1,31 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestToCsv < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_one_object_sans_headers
17
+ s = "foo,bar,baz\n1,2,3\n"
18
+ objs = @parser.parse(s)
19
+ assert_equal(1, objs.size)
20
+ assert_equal("1,2,3\n", objs.first.to_s)
21
+ end
22
+
23
+ def test_one_object_with_headers
24
+ s = "foo,bar,baz\n1,2,3\n"
25
+ objs = @parser.parse(s)
26
+ assert_equal(1, objs.size)
27
+ assert_equal(s, objs.first.to_s_with_headers)
28
+ end
29
+
30
+ end
31
+
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csvobj
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - Martin Carpenter
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-11-25 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: CSVobj provides a legible and maintainable mechanism to manipulate CSV files by creating an array of objects from a file or string of CSV information. The resulting object's attributes are defined dynamically and are based on the CSV column name.
17
+ email: martin.carpenter@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - Rakefile
25
+ - README.rdoc
26
+ files:
27
+ - lib/csvobj.rb
28
+ - test/test_headers.rb
29
+ - test/test_parse.rb
30
+ - test/test_header_to_attr.rb
31
+ - test/test_to_a.rb
32
+ - test/test_to_csv.rb
33
+ - LICENSE
34
+ - Rakefile
35
+ - README.rdoc
36
+ has_rdoc: true
37
+ homepage: http://mcarpenter.org/projects/csvobj
38
+ licenses:
39
+ - BSD
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.3.5
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Convert CSV files to an array of objects with friendly "column name" attributes
64
+ test_files: []
65
+