csvobj 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+
2
+ Copyright 2010 Martin Carpenter. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are
5
+ permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
11
+ of conditions and the following disclaimer in the documentation and/or other materials
12
+ provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY Martin Carpenter ``AS IS'' AND ANY EXPRESS OR IMPLIED
15
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
16
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Martin Carpenter OR
17
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
+
24
+ The views and conclusions contained in the software and documentation are those of the
25
+ authors and should not be interpreted as representing official policies, either expressed
26
+ or implied, of Martin Carpenter.
27
+
@@ -0,0 +1,53 @@
1
+
2
+ = CSVobj
3
+
4
+ == Aims
5
+
6
+ CSVobj has two simple aims in the processing of CSV files:
7
+
8
+ * clarity
9
+ * robustness
10
+
11
+ It achieves these aims by automatically instantiating a class with
12
+ dynamic attributes that can be used to refer to each column of the CSV
13
+ by name. Each attribute name is derived in a predictable fashion from
14
+ the column headers (the first row of the CSV).
15
+
16
+ === Clarity
17
+
18
+ Rather than referring programatically to the "third element of the row"
19
+ ("row[2]") one can simply say "row.last_name".
20
+
21
+ === Robustness
22
+
23
+ Any change to the CSV format (eg by inserting a new column between two
24
+ existing columns) will not require a program change: you do not need to
25
+ hunt down all references to "row[2]" and make them "row[3]"; they remain
26
+ "row.last_name".
27
+
28
+ == Example
29
+
30
+ Given the following CSV file:
31
+
32
+ First Name,Last Name
33
+ Cheryl,James
34
+ Sandra,Denton
35
+ Deidra,Roper
36
+
37
+ we can extract just the last names as follows:
38
+
39
+ # Get the gem
40
+ require 'rubygems'
41
+ require 'csvobj'
42
+
43
+ # Subclass because #parse defines methods on the class
44
+ class MyCsv < CSVobj ; end
45
+
46
+ # First command line argument is CSV file to read
47
+ csv_file = File.new( ARGV[0] )
48
+
49
+ # Parse the file, print the "last name" column of each row
50
+ MyCsv.parse(csv_file).each do |row|
51
+ puts row.last_name
52
+ end
53
+
@@ -0,0 +1,40 @@
1
+
2
+ require 'rake'
3
+ require 'rake/rdoctask'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/testtask'
6
+
7
+ desc 'Default task (test)'
8
+ task :default => [:test]
9
+
10
+ desc 'Run unit tests'
11
+ Rake::TestTask.new('test') do |test|
12
+ test.pattern = 'test/*.rb'
13
+ test.warning = true
14
+ end
15
+
16
+ task :gem
17
+ spec = eval( File.read('csvobj.gemspec') )
18
+ Rake::GemPackageTask.new(spec) do |pkg|
19
+ pkg.need_tar = true
20
+ end
21
+
22
+ desc 'Generate rdoc'
23
+ Rake::RDocTask.new do |rdoc|
24
+ rdoc.rdoc_dir = 'rdoc'
25
+ rdoc.title = 'csvobj.rb'
26
+ rdoc.options << '--line-numbers'
27
+ rdoc.options << '--inline-source'
28
+ rdoc.options << '-A cattr_accessor=object'
29
+ rdoc.options << '--charset' << 'utf-8'
30
+ rdoc.options << '--all'
31
+ rdoc.rdoc_files.include('README.rdoc')
32
+ rdoc.rdoc_files.include('lib/csvobj.rb')
33
+ rdoc.rdoc_files.include( Dir.glob( File.join('test', '*.rb') ) )
34
+ end
35
+
36
+ desc 'Clean up'
37
+ task :clean do
38
+ FileUtils.rm( Dir.glob( File.join('pkg', '*') ) )
39
+ FileUtils.rm_r( Dir.glob( File.join('rdoc', '*') ) )
40
+ end
@@ -0,0 +1,130 @@
1
+
2
+ require 'csv'
3
+
4
+ # Generic exception for CSVobj class: actual exceptions are subclassed.
5
+ class CSVobjException < Exception ; end
6
+
7
+ # Raised if the read CSV header row contains a duplicate field.
8
+ class CSVobjDuplicateHeader < CSVobjException ; end
9
+
10
+ # This class represents each row of a CSV file as an object in an array.
11
+ # Object attributes are automatically created from the header (first)
12
+ # row in the CSV. Note that this class dynamically redefines its own
13
+ # attributes and constructor so you should subclass it for any remotely
14
+ # serious work.
15
+ class CSVobj
16
+
17
+ # Parse the given CSV, which may be a multi-line string or
18
+ # IO object (see #parse_s) or an array of row arrays (see
19
+ # #parse_a) and return an array of CSVobjs.
20
+ # Expects the first row to be the headers (these are used
21
+ # for the objects' attributes).
22
+ def self.parse(csv)
23
+ csv.is_a?(Array) ? parse_a(csv) : parse_s(csv)
24
+ end
25
+
26
+ # Interpret the given multi-line string or IO object of
27
+ # CSV records and return an array of CSVobjs.
28
+ # Expects the first row to be the headers (these are used
29
+ # for the objects' attributes).
30
+ def self.parse_s(csv)
31
+ rows = s_to_a(csv)
32
+ parse_a(rows)
33
+ end
34
+
35
+ # Interpret an array (rows) of arrays (cells) and return
36
+ # an array of CSVobjs.
37
+ # Expects the first row to be the headers (these are used
38
+ # for the objects' attributes).
39
+ def self.parse_a(rows)
40
+ return [] if rows.empty?
41
+
42
+ # Get attributes from first row.
43
+ headers = rows.shift
44
+ attr_symbols = headers.map{ |header| header_to_attr(header) }
45
+ dupes = attr_symbols.uniq!
46
+ raise CSVobjDuplicateHeader, "Duplicate derived headers\n#{dupes}" if dupes
47
+
48
+ # Declare attributes and constructor. This will take the form:
49
+ # #new(header1, header2, ...).
50
+ instance_variables = attr_symbols.map{ |attr| "@#{attr}" }
51
+ class_eval do
52
+ attr_accessor(*attr_symbols)
53
+ define_method :initialize do |*args|
54
+ instance_variables.zip(args).each do |instance_variable, arg|
55
+ instance_variable_set(instance_variable, arg)
56
+ end
57
+ end
58
+ end
59
+
60
+ # Define class method #headers to return an array of the (unmangled)
61
+ # CSV headers by defining the method on the metaclass.
62
+ (class << self ; self end).send(:define_method, :headers) { headers }
63
+
64
+ # Create and return array of CSVobj, one element for each CSV row
65
+ # (minus the headers).
66
+ rows.map { |row| new(*row) }
67
+ end
68
+
69
+ # Convert a header (string) value to a an attribute symbol:
70
+ # Remove leading and trailing whitespace;
71
+ # replace (repeated) non-word characters with a single underscore;
72
+ # prefix leading digit with underscore;
73
+ # remove repeated adjacent underscores;
74
+ # convert to a lower-case symbol.
75
+ def self.header_to_attr(header)
76
+ header.strip. # remove trailing and leading whitespace
77
+ gsub(/\W+/, '_'). # substitute underscore for non-word/digit characters
78
+ sub(/^(\d)/, '_\1'). # if starts with a digit insert a leading underscore
79
+ gsub(/_+/, '_'). # remove duplicate adjacent underscores
80
+ downcase. # convert to lower case
81
+ to_sym # convert to symbol
82
+ end
83
+
84
+ # Return an array of (string) values for this CSV object in the order
85
+ # that they were given. See dynamically defined method +headers+ to get
86
+ # the equivalent array of headers.
87
+ def to_a
88
+ self.class.headers.map do |header|
89
+ attr = self.class.header_to_attr(header)
90
+ send(attr)
91
+ end
92
+ end
93
+
94
+ # Return a CSV string representing this object. Does not include headers
95
+ # (see #to_s_with_headers).
96
+ def to_s
97
+ a_to_s(to_a)
98
+ end
99
+
100
+ # Return a CSV string representing this object including an initial line
101
+ # of headers as originally given on object creation.
102
+ def to_s_with_headers
103
+ a_to_s(self.class.headers) + to_s
104
+ end
105
+
106
+ private
107
+
108
+ # Abstraction to deal with ruby 1.8/1.9 CSV incompatibilities.
109
+ # Takes an IO-like object or a string, returns an array of CSV objects.
110
+ def self.s_to_a(io_or_string)
111
+ if CSV.const_defined?(:Reader) # ruby 1.8
112
+ CSV::Reader.create( io_or_string ).to_a
113
+ else # ruby 1.9 and beyond
114
+ CSV.parse( io_or_string )
115
+ end
116
+ end
117
+
118
+ # Abstraction to deal with ruby 1.8/1.9 CSV class incompatibilities.
119
+ # Takes an array of values and returns a CSV-encoded string.
120
+ def a_to_s(a)
121
+ if CSV.const_defined?(:Reader) # ruby 1.8
122
+ (CSV::Writer.generate(s='') << a).close
123
+ s
124
+ else # ruby 1.9 and beyond
125
+ CSV.generate { |csv| csv << a }
126
+ end
127
+ end
128
+
129
+ end
130
+
@@ -0,0 +1,33 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestHeaderToAttr < Test::Unit::TestCase
7
+
8
+ def test_lowercase
9
+ assert_equal(:header, CSVobj.header_to_attr('HEADER'))
10
+ end
11
+
12
+ def test_remove_leading_whitespace
13
+ assert_equal(:header, CSVobj.header_to_attr(' header'))
14
+ end
15
+
16
+ def test_remove_trailing_whitespace
17
+ assert_equal(:header, CSVobj.header_to_attr('header '))
18
+ end
19
+
20
+ def test_substitute_underscore_for_non_word_char
21
+ assert_equal(:_hea_der_, CSVobj.header_to_attr('^hea&der('))
22
+ end
23
+
24
+ def test_no_repeated_adjacent_underscores
25
+ assert_equal(:hea_der, CSVobj.header_to_attr('hea^&*der'))
26
+ end
27
+
28
+ def test_leading_digit
29
+ assert_equal(:_9header, CSVobj.header_to_attr('9header'))
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,29 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestHeaders < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_simple
17
+ s = 'foo,bar,baz'
18
+ objs = @parser.parse(s)
19
+ assert_equal(%w[ foo bar baz ], @parser.headers)
20
+ end
21
+
22
+ def test_not_mangled
23
+ s = 'Header one,!Header two'
24
+ objs = @parser.parse(s)
25
+ assert_equal([ 'Header one', '!Header two' ], @parser.headers)
26
+ end
27
+
28
+ end
29
+
@@ -0,0 +1,118 @@
1
+
2
+ require 'test/unit'
3
+ require 'stringio'
4
+
5
+ require 'csvobj'
6
+
7
+ class TestParse < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @parser = Class.new(CSVobj)
11
+ end
12
+
13
+ def teardown
14
+ @parser = nil
15
+ end
16
+
17
+ def test_duplicate_header
18
+ s = "foo,bar,bar,baz\n1,2,3,4\n"
19
+ assert_raise CSVobjDuplicateHeader do
20
+ @parser.parse(s)
21
+ end
22
+ end
23
+
24
+ def test_one_obj_from_string
25
+ s = "foo,bar,baz\n1,2,3\n"
26
+ objs = @parser.parse(s)
27
+ assert_equal(1, objs.size)
28
+ assert_equal('1', objs[0].foo)
29
+ assert_equal('2', objs[0].bar)
30
+ assert_equal('3', objs[0].baz)
31
+ end
32
+
33
+ def test_two_objs_from_string
34
+ s = "foo,bar,baz\n1,2,3\n4,5,6\n"
35
+ objs = @parser.parse(s)
36
+ assert_equal(2, objs.size)
37
+ assert_equal('1', objs[0].foo)
38
+ assert_equal('2', objs[0].bar)
39
+ assert_equal('3', objs[0].baz)
40
+ assert_equal('4', objs[1].foo)
41
+ assert_equal('5', objs[1].bar)
42
+ assert_equal('6', objs[1].baz)
43
+ end
44
+
45
+ def test_two_objs_from_file
46
+ s = "foo,bar,baz\n1,2,3\n4,5,6\n"
47
+ fileish = StringIO.new(s)
48
+ objs = @parser.parse(fileish)
49
+ assert_equal(2, objs.size)
50
+ assert_equal('1', objs[0].foo)
51
+ assert_equal('2', objs[0].bar)
52
+ assert_equal('3', objs[0].baz)
53
+ assert_equal('4', objs[1].foo)
54
+ assert_equal('5', objs[1].bar)
55
+ assert_equal('6', objs[1].baz)
56
+ end
57
+
58
+ def test_no_trailing_newline
59
+ s = "foo,bar,baz\n1,2,3"
60
+ objs = @parser.parse(s)
61
+ assert_equal(1, objs.size)
62
+ assert_equal('1', objs[0].foo)
63
+ assert_equal('2', objs[0].bar)
64
+ assert_equal('3', objs[0].baz)
65
+ end
66
+
67
+ def test_missing_data_cell
68
+ s = "foo,bar,baz\n1,2\n4,5,6\n"
69
+ objs = @parser.parse(s)
70
+ assert_equal(2, objs.size)
71
+ assert_equal('1', objs[0].foo)
72
+ assert_equal('2', objs[0].bar)
73
+ assert_equal(nil, objs[0].baz)
74
+ assert_equal('4', objs[1].foo)
75
+ assert_equal('5', objs[1].bar)
76
+ assert_equal('6', objs[1].baz)
77
+ end
78
+
79
+ def test_missing_header_cell
80
+ s = "foo,bar\n1,2,3\n"
81
+ objs = @parser.parse(s)
82
+ assert_equal(1, objs.size)
83
+ assert_equal('1', objs[0].foo)
84
+ assert_equal('2', objs[0].bar)
85
+ assert_raise NoMethodError do
86
+ objs[0].baz
87
+ end
88
+ end
89
+
90
+ def test_missing_data
91
+ s = "foo,bar,baz\n"
92
+ objs = @parser.parse(s)
93
+ assert_equal([], objs)
94
+ end
95
+
96
+ def test_empty
97
+ objs = @parser.parse('')
98
+ assert_equal([], objs)
99
+ end
100
+
101
+ def test_array
102
+ a = [
103
+ [ 'foo', 'bar', 'baz' ],
104
+ [ '1', '2', '3' ],
105
+ [ '4', '5', '6' ]
106
+ ]
107
+ objs = @parser.parse(a)
108
+ assert_equal(2, objs.size)
109
+ assert_equal('1', objs[0].foo)
110
+ assert_equal('2', objs[0].bar)
111
+ assert_equal('3', objs[0].baz)
112
+ assert_equal('4', objs[1].foo)
113
+ assert_equal('5', objs[1].bar)
114
+ assert_equal('6', objs[1].baz)
115
+ end
116
+
117
+ end
118
+
@@ -0,0 +1,38 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestToA < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_one_complete_object
17
+ s = "foo,bar,baz\n1,2,3\n"
18
+ objs = @parser.parse(s)
19
+ assert_equal(1, objs.size)
20
+ assert_equal(%w[ 1 2 3 ], objs[0].to_a)
21
+ end
22
+
23
+ def test_one_object_missing_data_cell
24
+ s = "foo,bar,baz\n1,2\n"
25
+ objs = @parser.parse(s)
26
+ assert_equal(1, objs.size)
27
+ assert_equal([ '1', '2', nil ], objs[0].to_a)
28
+ end
29
+
30
+ def test_one_object_missing_header
31
+ s = "foo,bar\n1,2,3\n"
32
+ objs = @parser.parse(s)
33
+ assert_equal(1, objs.size)
34
+ assert_equal(%w[ 1 2 ], objs[0].to_a)
35
+ end
36
+
37
+ end
38
+
@@ -0,0 +1,31 @@
1
+
2
+ require 'test/unit'
3
+
4
+ require 'csvobj'
5
+
6
+ class TestToCsv < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @parser = Class.new(CSVobj)
10
+ end
11
+
12
+ def teardown
13
+ @parser = nil
14
+ end
15
+
16
+ def test_one_object_sans_headers
17
+ s = "foo,bar,baz\n1,2,3\n"
18
+ objs = @parser.parse(s)
19
+ assert_equal(1, objs.size)
20
+ assert_equal("1,2,3\n", objs.first.to_s)
21
+ end
22
+
23
+ def test_one_object_with_headers
24
+ s = "foo,bar,baz\n1,2,3\n"
25
+ objs = @parser.parse(s)
26
+ assert_equal(1, objs.size)
27
+ assert_equal(s, objs.first.to_s_with_headers)
28
+ end
29
+
30
+ end
31
+
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csvobj
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - Martin Carpenter
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-11-25 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: CSVobj provides a legible and maintainable mechanism to manipulate CSV files by creating an array of objects from a file or string of CSV information. The resulting object's attributes are defined dynamically and are based on the CSV column name.
17
+ email: martin.carpenter@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - Rakefile
25
+ - README.rdoc
26
+ files:
27
+ - lib/csvobj.rb
28
+ - test/test_headers.rb
29
+ - test/test_parse.rb
30
+ - test/test_header_to_attr.rb
31
+ - test/test_to_a.rb
32
+ - test/test_to_csv.rb
33
+ - LICENSE
34
+ - Rakefile
35
+ - README.rdoc
36
+ has_rdoc: true
37
+ homepage: http://mcarpenter.org/projects/csvobj
38
+ licenses:
39
+ - BSD
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.3.5
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Convert CSV files to an array of objects with friendly "column name" attributes
64
+ test_files: []
65
+