csv-autoparser 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b806b5d4df47de7532a01384fa357b998e53653a
4
- data.tar.gz: 1ad970d72389cb2a061127d69c584ed8be1e0244
3
+ metadata.gz: 202d8f86d22651aeecb94ab9b5a839c1df8d0117
4
+ data.tar.gz: 06df0597fc5d4bd6df4f9f7da5ded7bdda5542c2
5
5
  SHA512:
6
- metadata.gz: 9263abf623d4ffb74ad65ba9243f9fa36e9924ba7787f7c9619bbbf866a3843764ee2f0a1476af6331c16172eed75f948340562c8750ca37b47260b803594b5a
7
- data.tar.gz: 07d207dc0bd482d688d721a714bb4caa8527925f521099f47dfe6bb0cf485645811c6d598a572e24eb86f222868909e0246fcf2040d42d9e5f6f63708f3afedc
6
+ metadata.gz: 409f3c4d1505698a2e248309d061a0c77c98c965081361a058756a215eb5eb3601b5385604065a231482dde5b55af48e4cf6b58d9a4200926c42c1ced7521a90
7
+ data.tar.gz: 9fc72adf4b37cd1b86fbe62e0f47e73731aa7c70963192bc4e6cf4f5afef6feeb71c9673a98c4f0a20a2880d376459f24abaf5a7f6239a9a99a36c310b0924f1
data/README.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # CSV::AutoParser
2
2
 
3
- CSV::AutoParser automatically parses a CSV file given a user specified header row.
3
+ CSV::AutoParser automatically parses a CSV file given a user specified header row and
4
+ adds method style accessors to the CSV::Row data.
5
+
6
+ All functionality of the standard Ruby CSV class is accessible since CSV::AutoParser
7
+ inherits from CSV. CSV::AutoParser objects behave just like CSV objects when not
8
+ provided a block (though method style accessors will still be avaliable on CSV::Row
9
+ objects).
4
10
 
5
11
  ## Installation
6
12
 
@@ -8,18 +14,36 @@ CSV::AutoParser automatically parses a CSV file given a user specified header ro
8
14
 
9
15
  ## Usage
10
16
 
11
- require 'csv/autoparser'
12
-
13
- # ID header row by CSV line number.
14
- csv = CSV::AutoParser.new("my_file.csv") {|csv_line_number, header_row| csv_line_number == 1 }
15
- csv.rows.each {|row| puts row.full_name }
16
-
17
- # -OR- ID header row by column header names.
18
- csv = CSV::AutoParser.new(input_file) do |line_num, header_row|
19
- ["name", "Job title"].all? {|cell| header_row.include?(cell) }
20
- end
21
- puts csv.rows.first.name # => "Jon Smith"
22
- csv.rows.first.job_title # => "blacksmith"
17
+ ```ruby
18
+ require 'csv/autoparser'
19
+
20
+ data = <<CSV
21
+ "this is",not,"the header"
22
+ "the real header","is easy","to find"
23
+ name,"Job title",age
24
+ "Jon Smith",blacksmith,55
25
+ "Jimmy Johnson",farmer,34
26
+ "Kimmy Kimmson","pig wrangler",29
27
+ CSV
28
+
29
+ # ID header row by CSV line number.
30
+ csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
31
+ csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
32
+ # Jon Smith is a 55 year old blacksmith.
33
+ # Jimmy Johnson is a 34 year old farmer.
34
+ # Kimmy Kimmson is a 29 year old pig wrangler.
35
+
36
+ # -OR- ID header row by column header names.
37
+ csv = CSV::AutoParser.new(data) do |line_num, header_row|
38
+ ["name", "Job title"].all? {|field| header_row.include?(field) }
39
+ end
40
+ csv.is_a?(CSV) # => true
41
+ table = csv.read # => CSV::Table
42
+ table.first.name # => "Jon Smith"
43
+ table[-1].job_title # => "pig wrangler"
44
+ ```
45
+
46
+ More usage examples can be seen in examples/.
23
47
 
24
48
  ## Contributing
25
49
 
data/examples/basic.rb ADDED
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
3
+
4
+ require 'csv/autoparser'
5
+
6
+ data = <<CSV
7
+ "this is",not,"the header"
8
+ "the real header","is easy","to find"
9
+ name,"Job title",age
10
+ "Jon Smith",blacksmith,55
11
+ "Jimmy Johnson",farmer,34
12
+ "Kimmy Kimmson","pig wrangler",29
13
+ CSV
14
+
15
+ ########################################
16
+ # Indentifying the Header Row
17
+ ########################################
18
+
19
+ # ID header row by CSV line number.
20
+ csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
21
+ csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
22
+ # Jon Smith is a 55 year old blacksmith.
23
+ # Jimmy Johnson is a 34 year old farmer.
24
+ # Kimmy Kimmson is a 29 year old pig wrangler.
25
+
26
+ # -OR- ID header row by column header names.
27
+ csv = CSV::AutoParser.new(data) do |line_num, header_row|
28
+ ["name", "Job title"].all? {|field| header_row.include?(field) }
29
+ end
30
+ csv.is_a?(CSV) # => true
31
+ table = csv.read # => CSV::Table
32
+ table.first.name # => "Jon Smith"
33
+ table[-1].job_title # => "pig wrangler"
34
+
35
+
36
+ ################################################
37
+ # Want to get at the data above the header row?
38
+ ################################################
39
+
40
+ csv = CSV::AutoParser.new(File.expand_path("../../test/fixtures/persons.csv", __FILE__)) {|lineno, hr| lineno == 3 }
41
+ csv.pre_header_rows.each do |row|
42
+ puts "#{row.file}(#{row.line}): #{row.inspect}"
43
+ end
44
+
45
+
46
+ ########################################
47
+ # Optional Header Columns
48
+ ########################################
49
+
50
+ data2 = <<CSV
51
+ name,"city name","Job title"
52
+ "Jon Smith",Sacramento,blacksmith
53
+ "Jimmy Johnson","San Diego",farmer
54
+ "Kimmy Kimmson",Austin,"pig wrangler"
55
+ CSV
56
+
57
+ # When CSV columns are optional, specify it through options so that a NoMethodError is not raised.
58
+ def demo_optional_header_columns data
59
+ csv = CSV::AutoParser.new(data, optional_headers: ["City Name", :age]) {|lineno, hr| hr.include? "name" }
60
+ csv.each do |row|
61
+ # row.birth_date # => raises NoMethodError
62
+ if row.city_name
63
+ puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title} living in #{row.city_name}."
64
+ else
65
+ puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title}."
66
+ end
67
+ end
68
+ end
69
+
70
+ demo_optional_header_columns(data)
71
+ demo_optional_header_columns(data2)
72
+
@@ -2,46 +2,98 @@ require "csv"
2
2
  require "csv/autoparser/version"
3
3
 
4
4
  class CSV
5
- class AutoParser
6
5
 
7
- class Row < Array
8
- attr_reader :csv_file, :csv_line
6
+ class Row
7
+ alias_method :orig_initialize, :initialize
8
+ # Defines method style accessors based on header row names.
9
+ def initialize(*args)
10
+ orig_initialize(*args)
11
+ if field_row?
12
+ headers.each do |h|
13
+ define_singleton_method(CSV::AutoParser.convert_header_to_method_name(h)) { fetch(h) }
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ class AutoParser < CSV
20
+
21
+ # This is the method called by AutoParser to turn header names into legal method names.
22
+ # Redefine as necessary.
23
+ def self.convert_header_to_method_name header
24
+ header.to_s.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '').to_sym
25
+ end
26
+
27
+ # The rows found before the header row are paired with file and line information. These
28
+ # objects are available through CSV::AutoParser#pre_header_rows.
29
+ class PreHeaderRow < Array
30
+ attr_reader :file, :line
9
31
  def self.create original_row, file, line
10
- row = Row.new(original_row)
11
- row.instance_eval { @csv_file = file; @csv_line = line }
32
+ row = PreHeaderRow.new(original_row)
33
+ row.instance_eval { @file = file; @line = line }
12
34
  return row
13
35
  end
14
36
  end
15
37
 
16
38
  class HeaderRowNotFound < RuntimeError; end
17
39
 
18
- attr_reader :pre_header_rows, :rows
40
+ attr_reader :pre_header_rows, :header_line_number
19
41
 
20
- def initialize file, &is_header
21
- map = {}
22
- csv_line_number = 0
23
- @rows = []
42
+ # +data+ can be path of CSV file in addition to a CSV String or an IO object like CSV.new.
43
+ # All CSV.new options are supported via +opts+. If an +&is_header+ block is provided, it
44
+ # takes precedence over the CSV.new +:headers+ option. A +:optional_headers+ option has
45
+ # been added for specifying headers that may not be present in the CSV, but you do not want
46
+ # a NoMethodError to raise when accessing a field using the header method style accessor.
47
+ def initialize data, opts={}, &is_header
48
+ @header_line_number = nil
24
49
  @pre_header_rows = []
25
- CSV.foreach(file) do |row|
26
- csv_line_number += 1
27
- if map.empty?
50
+ @optional_headers = [opts.delete(:optional_headers)].flatten.compact
51
+ if data.is_a?(String) and File.exists?(data)
52
+ file = data
53
+ data = File.open(data)
54
+ end
55
+ if block_given?
56
+ data_io = if data.is_a?(IO)
57
+ data
58
+ elsif data.is_a?(String)
59
+ StringIO.new(data)
60
+ else
61
+ raise ArgumentError, "data must be a path to a CSV file, a CSV formatted String, or an IO object."
62
+ end
63
+ header_pos = data_io.pos
64
+ csv_line_number = 0
65
+ header_finder = CSV.new(data_io, opts.merge(:headers => false)).each do |row|
66
+ csv_line_number += 1
28
67
  if is_header.call(csv_line_number, row)
29
- row.each_index {|index| map[row[index]] = index }
68
+ @header_line_number = csv_line_number
69
+ break
30
70
  else
31
- @pre_header_rows << Row::create(row, file, csv_line_number)
32
- end
33
- else
34
- @rows << Row::create(row, file, csv_line_number)
35
- map.each_pair do |column_name, column_offset|
36
- @rows.last.define_singleton_method(column_to_method_name(column_name)) { self[column_offset] }
71
+ @pre_header_rows << CSV::AutoParser::PreHeaderRow.create(row, file, csv_line_number)
37
72
  end
73
+ header_pos = data_io.pos
38
74
  end
75
+ raise HeaderRowNotFound, "Could not find header row#{file ? " in #{file}" : "" }." if @header_line_number.nil?
76
+ data_io.seek header_pos
77
+ data_io = StringIO.new(data_io.read)
78
+ super(data_io, opts.merge(:headers => true))
79
+ else
80
+ @header_line_number = 1 if opts[:headers] == :first_row or opts[:headers] == true
81
+ super(data, opts)
39
82
  end
40
- raise HeaderRowNotFound, "Could not find header row in #{file}." if map.empty?
41
83
  end
42
84
 
43
- def column_to_method_name name
44
- name.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '')
85
+ alias_method :orig_shift, :shift
86
+
87
+ # Overriden to add methods for optional headers which were not present in the CSV.
88
+ def shift
89
+ row = orig_shift
90
+ [@optional_headers].flatten.compact.each do |h|
91
+ method_name = self.class.convert_header_to_method_name(h)
92
+ unless row.respond_to? method_name
93
+ row.define_singleton_method(method_name) {nil}
94
+ end
95
+ end
96
+ return row
45
97
  end
46
98
 
47
99
  end
@@ -1,5 +1,5 @@
1
1
  class CSV
2
- class AutoParser
3
- VERSION = "0.1.0"
2
+ class AutoParser < CSV
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -4,29 +4,34 @@ require 'minitest_helper'
4
4
  describe CSV::AutoParser do
5
5
 
6
6
  it "it can parse a csv automatically via csv line number id" do
7
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }
8
- csv.rows.length.must_equal 5
9
- csv.rows.first.full_name.must_equal "bob"
7
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }.read
8
+ table.class.must_equal CSV::Table
9
+ table.length.must_equal 5
10
+ table.first["full-name"].must_equal "bob"
11
+ table.first.full_name.must_equal "bob"
10
12
  end
11
13
 
12
14
  it "it can parse a csv automatically via header row id" do
13
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
15
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol) do |line_num, header_row|
14
16
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
15
- end
16
- csv.rows.length.must_equal 3
17
- csv.rows.first.name.must_equal "Jon Smith"
18
- csv.rows.first.job_title.must_equal "blacksmith"
19
- csv.rows.last.age.to_i.must_equal 29
17
+ end.read
18
+ table.length.must_equal 3
19
+ table.first.name.must_equal "Jon Smith"
20
+ table.first["Job title"].must_be_nil
21
+ table.first[:job_title].must_equal "blacksmith"
22
+ table.first.job_title.must_equal "blacksmith"
23
+ table[-1].age.to_i.must_equal 29
20
24
  end
21
25
 
22
26
  it "it will give you the rows found before the header row" do
23
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
27
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
24
28
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
25
29
  end
26
- csv.pre_header_rows.first.last.must_equal "years of age"
27
- csv.pre_header_rows.last.first.must_equal "bob"
28
- File.basename(csv.pre_header_rows.last.csv_file).must_equal "persons.csv"
29
- csv.pre_header_rows.last.csv_line.must_equal 2
30
+ parser.header_line_number.must_equal 3
31
+ parser.pre_header_rows.first.last.must_equal "years of age"
32
+ parser.pre_header_rows.last.first.must_equal "bob"
33
+ File.basename(parser.pre_header_rows.last.file).must_equal "persons.csv"
34
+ parser.pre_header_rows.last.line.must_equal 2
30
35
  end
31
36
 
32
37
  it "will raise an exception if it can't find the header row" do
@@ -35,24 +40,79 @@ describe CSV::AutoParser do
35
40
  end
36
41
 
37
42
  it "will not confuse column information with another csv which is parsed simultaneously" do
38
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
43
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
39
44
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
40
- end
41
- csv2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
45
+ end.read
46
+ table2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
47
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
48
+ end.read
49
+ table.length.must_equal 3
50
+ table.first.name.must_equal "Jon Smith"
51
+ table.first.job_title.must_equal "blacksmith"
52
+ table[-1].age.to_i.must_equal 29
53
+ table[1].job_title.must_equal "farmer"
54
+
55
+ table2.length.must_equal 2
56
+ table2.first.name.must_equal "Kermy Frog"
57
+ table2.first.job_title.must_equal "frog"
58
+ table2[-1].age.to_i.must_equal 19
59
+ end
60
+
61
+ it "will define methods which return nil for optional columns not present in CSV" do
62
+ table1 = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
63
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
64
+ end.read
65
+ lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
66
+ table2 = CSV::AutoParser.new(fixture_file_path('persons.csv'), optional_headers: [:my_optional_header, "Zip Code", :name]) do |line_num, header_row|
42
67
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
68
+ end.read
69
+ table2.first.my_optional_header.must_be_nil
70
+ lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
71
+ table2.first.name.must_equal "Jon Smith"
72
+ lambda { table2.first.my_mandatory_header }.must_raise(NoMethodError)
73
+ table2.first.zip_code.must_be_nil
74
+ table2[-1].my_optional_header.must_be_nil
75
+ end
76
+
77
+ it "will pass along CSV.new options" do
78
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|l, hr| l == 1 }
79
+ parser.field_size_limit.must_be_nil
80
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), field_size_limit: 100) {|l, hr| l == 1 }
81
+ parser.field_size_limit.must_equal 100
82
+ end
83
+
84
+ it "should work with a CSV string or an IO object too" do
85
+ input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
86
+ input_objects.each do |obj|
87
+ table = CSV::AutoParser.new(obj) do |line_num, header_row|
88
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
89
+ end.read
90
+ table.length.must_equal 3
91
+ table.first.name.must_equal "Jon Smith"
92
+ table.first.job_title.must_equal "blacksmith"
93
+ table[-1].age.to_i.must_equal 29
94
+ end
95
+ end
96
+
97
+ it "should work just like CSV.new when not passed a block except that it can now take a file path as data too" do
98
+ input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
99
+ input_objects.each do |obj|
100
+ parser = CSV::AutoParser.new(obj, header_converters: :symbol, headers: :first_row)
101
+ parser.header_line_number.must_equal 1
102
+ parser.pre_header_rows.must_be_empty
103
+ table = parser.read
104
+ table.length.must_equal 5
105
+ table.first[:fullname].must_equal "bob"
106
+ # method names are based off of converted header names!
107
+ table.first.fullname.must_equal "bob"
43
108
  end
44
- csv.rows.length.must_equal 3
45
- csv.rows.first.name.must_equal "Jon Smith"
46
- csv.rows.first.job_title.must_equal "blacksmith"
47
- csv.rows.last.age.to_i.must_equal 29
48
- csv.rows[1].job_title.must_equal "farmer"
49
- csv.rows[1].csv_line.must_equal 5
50
-
51
- csv2.rows.length.must_equal 2
52
- csv2.rows.first.name.must_equal "Kermy Frog"
53
- csv2.rows.first.job_title.must_equal "frog"
54
- csv2.rows.last.age.to_i.must_equal 19
55
- File.basename(csv2.rows.first.csv_file).must_equal "persons2.csv"
109
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol, headers: "first_col,second_col,third_col,fourth_col")
110
+ parser.header_line_number.must_equal nil
111
+ parser.pre_header_rows.must_be_empty
112
+ table = parser.read
113
+ table.length.must_equal 6
114
+ table[0].first_col.must_equal "full-name"
115
+ table[1].first_col.must_equal "bob"
56
116
  end
57
117
 
58
118
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-autoparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Delsol
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2014-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -49,6 +49,7 @@ files:
49
49
  - README.md
50
50
  - Rakefile
51
51
  - csv-autoparser.gemspec
52
+ - examples/basic.rb
52
53
  - lib/csv/autoparser.rb
53
54
  - lib/csv/autoparser/version.rb
54
55
  - test/fixtures/persons.csv