csv-autoparser 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b806b5d4df47de7532a01384fa357b998e53653a
4
- data.tar.gz: 1ad970d72389cb2a061127d69c584ed8be1e0244
3
+ metadata.gz: 202d8f86d22651aeecb94ab9b5a839c1df8d0117
4
+ data.tar.gz: 06df0597fc5d4bd6df4f9f7da5ded7bdda5542c2
5
5
  SHA512:
6
- metadata.gz: 9263abf623d4ffb74ad65ba9243f9fa36e9924ba7787f7c9619bbbf866a3843764ee2f0a1476af6331c16172eed75f948340562c8750ca37b47260b803594b5a
7
- data.tar.gz: 07d207dc0bd482d688d721a714bb4caa8527925f521099f47dfe6bb0cf485645811c6d598a572e24eb86f222868909e0246fcf2040d42d9e5f6f63708f3afedc
6
+ metadata.gz: 409f3c4d1505698a2e248309d061a0c77c98c965081361a058756a215eb5eb3601b5385604065a231482dde5b55af48e4cf6b58d9a4200926c42c1ced7521a90
7
+ data.tar.gz: 9fc72adf4b37cd1b86fbe62e0f47e73731aa7c70963192bc4e6cf4f5afef6feeb71c9673a98c4f0a20a2880d376459f24abaf5a7f6239a9a99a36c310b0924f1
data/README.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # CSV::AutoParser
2
2
 
3
- CSV::AutoParser automatically parses a CSV file given a user specified header row.
3
+ CSV::AutoParser automatically parses a CSV file given a user specified header row and
4
+ adds method style accessors to the CSV::Row data.
5
+
6
+ All functionality of the standard Ruby CSV class is accessible since CSV::AutoParser
7
+ inherits from CSV. CSV::AutoParser objects behave just like CSV objects when not
8
+ provided a block (though method style accessors will still be avaliable on CSV::Row
9
+ objects).
4
10
 
5
11
  ## Installation
6
12
 
@@ -8,18 +14,36 @@ CSV::AutoParser automatically parses a CSV file given a user specified header ro
8
14
 
9
15
  ## Usage
10
16
 
11
- require 'csv/autoparser'
12
-
13
- # ID header row by CSV line number.
14
- csv = CSV::AutoParser.new("my_file.csv") {|csv_line_number, header_row| csv_line_number == 1 }
15
- csv.rows.each {|row| puts row.full_name }
16
-
17
- # -OR- ID header row by column header names.
18
- csv = CSV::AutoParser.new(input_file) do |line_num, header_row|
19
- ["name", "Job title"].all? {|cell| header_row.include?(cell) }
20
- end
21
- puts csv.rows.first.name # => "Jon Smith"
22
- csv.rows.first.job_title # => "blacksmith"
17
+ ```ruby
18
+ require 'csv/autoparser'
19
+
20
+ data = <<CSV
21
+ "this is",not,"the header"
22
+ "the real header","is easy","to find"
23
+ name,"Job title",age
24
+ "Jon Smith",blacksmith,55
25
+ "Jimmy Johnson",farmer,34
26
+ "Kimmy Kimmson","pig wrangler",29
27
+ CSV
28
+
29
+ # ID header row by CSV line number.
30
+ csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
31
+ csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
32
+ # Jon Smith is a 55 year old blacksmith.
33
+ # Jimmy Johnson is a 34 year old farmer.
34
+ # Kimmy Kimmson is a 29 year old pig wrangler.
35
+
36
+ # -OR- ID header row by column header names.
37
+ csv = CSV::AutoParser.new(data) do |line_num, header_row|
38
+ ["name", "Job title"].all? {|field| header_row.include?(field) }
39
+ end
40
+ csv.is_a?(CSV) # => true
41
+ table = csv.read # => CSV::Table
42
+ table.first.name # => "Jon Smith"
43
+ table[-1].job_title # => "pig wrangler"
44
+ ```
45
+
46
+ More usage examples can be seen in examples/.
23
47
 
24
48
  ## Contributing
25
49
 
data/examples/basic.rb ADDED
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
3
+
4
+ require 'csv/autoparser'
5
+
6
+ data = <<CSV
7
+ "this is",not,"the header"
8
+ "the real header","is easy","to find"
9
+ name,"Job title",age
10
+ "Jon Smith",blacksmith,55
11
+ "Jimmy Johnson",farmer,34
12
+ "Kimmy Kimmson","pig wrangler",29
13
+ CSV
14
+
15
+ ########################################
16
+ # Indentifying the Header Row
17
+ ########################################
18
+
19
+ # ID header row by CSV line number.
20
+ csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
21
+ csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
22
+ # Jon Smith is a 55 year old blacksmith.
23
+ # Jimmy Johnson is a 34 year old farmer.
24
+ # Kimmy Kimmson is a 29 year old pig wrangler.
25
+
26
+ # -OR- ID header row by column header names.
27
+ csv = CSV::AutoParser.new(data) do |line_num, header_row|
28
+ ["name", "Job title"].all? {|field| header_row.include?(field) }
29
+ end
30
+ csv.is_a?(CSV) # => true
31
+ table = csv.read # => CSV::Table
32
+ table.first.name # => "Jon Smith"
33
+ table[-1].job_title # => "pig wrangler"
34
+
35
+
36
+ ################################################
37
+ # Want to get at the data above the header row?
38
+ ################################################
39
+
40
+ csv = CSV::AutoParser.new(File.expand_path("../../test/fixtures/persons.csv", __FILE__)) {|lineno, hr| lineno == 3 }
41
+ csv.pre_header_rows.each do |row|
42
+ puts "#{row.file}(#{row.line}): #{row.inspect}"
43
+ end
44
+
45
+
46
+ ########################################
47
+ # Optional Header Columns
48
+ ########################################
49
+
50
+ data2 = <<CSV
51
+ name,"city name","Job title"
52
+ "Jon Smith",Sacramento,blacksmith
53
+ "Jimmy Johnson","San Diego",farmer
54
+ "Kimmy Kimmson",Austin,"pig wrangler"
55
+ CSV
56
+
57
+ # When CSV columns are optional, specify it through options so that a NoMethodError is not raised.
58
+ def demo_optional_header_columns data
59
+ csv = CSV::AutoParser.new(data, optional_headers: ["City Name", :age]) {|lineno, hr| hr.include? "name" }
60
+ csv.each do |row|
61
+ # row.birth_date # => raises NoMethodError
62
+ if row.city_name
63
+ puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title} living in #{row.city_name}."
64
+ else
65
+ puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title}."
66
+ end
67
+ end
68
+ end
69
+
70
+ demo_optional_header_columns(data)
71
+ demo_optional_header_columns(data2)
72
+
@@ -2,46 +2,98 @@ require "csv"
2
2
  require "csv/autoparser/version"
3
3
 
4
4
  class CSV
5
- class AutoParser
6
5
 
7
- class Row < Array
8
- attr_reader :csv_file, :csv_line
6
+ class Row
7
+ alias_method :orig_initialize, :initialize
8
+ # Defines method style accessors based on header row names.
9
+ def initialize(*args)
10
+ orig_initialize(*args)
11
+ if field_row?
12
+ headers.each do |h|
13
+ define_singleton_method(CSV::AutoParser.convert_header_to_method_name(h)) { fetch(h) }
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ class AutoParser < CSV
20
+
21
+ # This is the method called by AutoParser to turn header names into legal method names.
22
+ # Redefine as necessary.
23
+ def self.convert_header_to_method_name header
24
+ header.to_s.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '').to_sym
25
+ end
26
+
27
+ # The rows found before the header row are paired with file and line information. These
28
+ # objects are available through CSV::AutoParser#pre_header_rows.
29
+ class PreHeaderRow < Array
30
+ attr_reader :file, :line
9
31
  def self.create original_row, file, line
10
- row = Row.new(original_row)
11
- row.instance_eval { @csv_file = file; @csv_line = line }
32
+ row = PreHeaderRow.new(original_row)
33
+ row.instance_eval { @file = file; @line = line }
12
34
  return row
13
35
  end
14
36
  end
15
37
 
16
38
  class HeaderRowNotFound < RuntimeError; end
17
39
 
18
- attr_reader :pre_header_rows, :rows
40
+ attr_reader :pre_header_rows, :header_line_number
19
41
 
20
- def initialize file, &is_header
21
- map = {}
22
- csv_line_number = 0
23
- @rows = []
42
+ # +data+ can be path of CSV file in addition to a CSV String or an IO object like CSV.new.
43
+ # All CSV.new options are supported via +opts+. If an +&is_header+ block is provided, it
44
+ # takes precedence over the CSV.new +:headers+ option. A +:optional_headers+ option has
45
+ # been added for specifying headers that may not be present in the CSV, but you do not want
46
+ # a NoMethodError to raise when accessing a field using the header method style accessor.
47
+ def initialize data, opts={}, &is_header
48
+ @header_line_number = nil
24
49
  @pre_header_rows = []
25
- CSV.foreach(file) do |row|
26
- csv_line_number += 1
27
- if map.empty?
50
+ @optional_headers = [opts.delete(:optional_headers)].flatten.compact
51
+ if data.is_a?(String) and File.exists?(data)
52
+ file = data
53
+ data = File.open(data)
54
+ end
55
+ if block_given?
56
+ data_io = if data.is_a?(IO)
57
+ data
58
+ elsif data.is_a?(String)
59
+ StringIO.new(data)
60
+ else
61
+ raise ArgumentError, "data must be a path to a CSV file, a CSV formatted String, or an IO object."
62
+ end
63
+ header_pos = data_io.pos
64
+ csv_line_number = 0
65
+ header_finder = CSV.new(data_io, opts.merge(:headers => false)).each do |row|
66
+ csv_line_number += 1
28
67
  if is_header.call(csv_line_number, row)
29
- row.each_index {|index| map[row[index]] = index }
68
+ @header_line_number = csv_line_number
69
+ break
30
70
  else
31
- @pre_header_rows << Row::create(row, file, csv_line_number)
32
- end
33
- else
34
- @rows << Row::create(row, file, csv_line_number)
35
- map.each_pair do |column_name, column_offset|
36
- @rows.last.define_singleton_method(column_to_method_name(column_name)) { self[column_offset] }
71
+ @pre_header_rows << CSV::AutoParser::PreHeaderRow.create(row, file, csv_line_number)
37
72
  end
73
+ header_pos = data_io.pos
38
74
  end
75
+ raise HeaderRowNotFound, "Could not find header row#{file ? " in #{file}" : "" }." if @header_line_number.nil?
76
+ data_io.seek header_pos
77
+ data_io = StringIO.new(data_io.read)
78
+ super(data_io, opts.merge(:headers => true))
79
+ else
80
+ @header_line_number = 1 if opts[:headers] == :first_row or opts[:headers] == true
81
+ super(data, opts)
39
82
  end
40
- raise HeaderRowNotFound, "Could not find header row in #{file}." if map.empty?
41
83
  end
42
84
 
43
- def column_to_method_name name
44
- name.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '')
85
+ alias_method :orig_shift, :shift
86
+
87
+ # Overriden to add methods for optional headers which were not present in the CSV.
88
+ def shift
89
+ row = orig_shift
90
+ [@optional_headers].flatten.compact.each do |h|
91
+ method_name = self.class.convert_header_to_method_name(h)
92
+ unless row.respond_to? method_name
93
+ row.define_singleton_method(method_name) {nil}
94
+ end
95
+ end
96
+ return row
45
97
  end
46
98
 
47
99
  end
@@ -1,5 +1,5 @@
1
1
  class CSV
2
- class AutoParser
3
- VERSION = "0.1.0"
2
+ class AutoParser < CSV
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -4,29 +4,34 @@ require 'minitest_helper'
4
4
  describe CSV::AutoParser do
5
5
 
6
6
  it "it can parse a csv automatically via csv line number id" do
7
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }
8
- csv.rows.length.must_equal 5
9
- csv.rows.first.full_name.must_equal "bob"
7
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }.read
8
+ table.class.must_equal CSV::Table
9
+ table.length.must_equal 5
10
+ table.first["full-name"].must_equal "bob"
11
+ table.first.full_name.must_equal "bob"
10
12
  end
11
13
 
12
14
  it "it can parse a csv automatically via header row id" do
13
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
15
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol) do |line_num, header_row|
14
16
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
15
- end
16
- csv.rows.length.must_equal 3
17
- csv.rows.first.name.must_equal "Jon Smith"
18
- csv.rows.first.job_title.must_equal "blacksmith"
19
- csv.rows.last.age.to_i.must_equal 29
17
+ end.read
18
+ table.length.must_equal 3
19
+ table.first.name.must_equal "Jon Smith"
20
+ table.first["Job title"].must_be_nil
21
+ table.first[:job_title].must_equal "blacksmith"
22
+ table.first.job_title.must_equal "blacksmith"
23
+ table[-1].age.to_i.must_equal 29
20
24
  end
21
25
 
22
26
  it "it will give you the rows found before the header row" do
23
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
27
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
24
28
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
25
29
  end
26
- csv.pre_header_rows.first.last.must_equal "years of age"
27
- csv.pre_header_rows.last.first.must_equal "bob"
28
- File.basename(csv.pre_header_rows.last.csv_file).must_equal "persons.csv"
29
- csv.pre_header_rows.last.csv_line.must_equal 2
30
+ parser.header_line_number.must_equal 3
31
+ parser.pre_header_rows.first.last.must_equal "years of age"
32
+ parser.pre_header_rows.last.first.must_equal "bob"
33
+ File.basename(parser.pre_header_rows.last.file).must_equal "persons.csv"
34
+ parser.pre_header_rows.last.line.must_equal 2
30
35
  end
31
36
 
32
37
  it "will raise an exception if it can't find the header row" do
@@ -35,24 +40,79 @@ describe CSV::AutoParser do
35
40
  end
36
41
 
37
42
  it "will not confuse column information with another csv which is parsed simultaneously" do
38
- csv = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
43
+ table = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
39
44
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
40
- end
41
- csv2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
45
+ end.read
46
+ table2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
47
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
48
+ end.read
49
+ table.length.must_equal 3
50
+ table.first.name.must_equal "Jon Smith"
51
+ table.first.job_title.must_equal "blacksmith"
52
+ table[-1].age.to_i.must_equal 29
53
+ table[1].job_title.must_equal "farmer"
54
+
55
+ table2.length.must_equal 2
56
+ table2.first.name.must_equal "Kermy Frog"
57
+ table2.first.job_title.must_equal "frog"
58
+ table2[-1].age.to_i.must_equal 19
59
+ end
60
+
61
+ it "will define methods which return nil for optional columns not present in CSV" do
62
+ table1 = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
63
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
64
+ end.read
65
+ lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
66
+ table2 = CSV::AutoParser.new(fixture_file_path('persons.csv'), optional_headers: [:my_optional_header, "Zip Code", :name]) do |line_num, header_row|
42
67
  ["name", "Job title"].all? {|cell| header_row.include?(cell) }
68
+ end.read
69
+ table2.first.my_optional_header.must_be_nil
70
+ lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
71
+ table2.first.name.must_equal "Jon Smith"
72
+ lambda { table2.first.my_mandatory_header }.must_raise(NoMethodError)
73
+ table2.first.zip_code.must_be_nil
74
+ table2[-1].my_optional_header.must_be_nil
75
+ end
76
+
77
+ it "will pass along CSV.new options" do
78
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|l, hr| l == 1 }
79
+ parser.field_size_limit.must_be_nil
80
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), field_size_limit: 100) {|l, hr| l == 1 }
81
+ parser.field_size_limit.must_equal 100
82
+ end
83
+
84
+ it "should work with a CSV string or an IO object too" do
85
+ input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
86
+ input_objects.each do |obj|
87
+ table = CSV::AutoParser.new(obj) do |line_num, header_row|
88
+ ["name", "Job title"].all? {|cell| header_row.include?(cell) }
89
+ end.read
90
+ table.length.must_equal 3
91
+ table.first.name.must_equal "Jon Smith"
92
+ table.first.job_title.must_equal "blacksmith"
93
+ table[-1].age.to_i.must_equal 29
94
+ end
95
+ end
96
+
97
+ it "should work just like CSV.new when not passed a block except that it can now take a file path as data too" do
98
+ input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
99
+ input_objects.each do |obj|
100
+ parser = CSV::AutoParser.new(obj, header_converters: :symbol, headers: :first_row)
101
+ parser.header_line_number.must_equal 1
102
+ parser.pre_header_rows.must_be_empty
103
+ table = parser.read
104
+ table.length.must_equal 5
105
+ table.first[:fullname].must_equal "bob"
106
+ # method names are based off of converted header names!
107
+ table.first.fullname.must_equal "bob"
43
108
  end
44
- csv.rows.length.must_equal 3
45
- csv.rows.first.name.must_equal "Jon Smith"
46
- csv.rows.first.job_title.must_equal "blacksmith"
47
- csv.rows.last.age.to_i.must_equal 29
48
- csv.rows[1].job_title.must_equal "farmer"
49
- csv.rows[1].csv_line.must_equal 5
50
-
51
- csv2.rows.length.must_equal 2
52
- csv2.rows.first.name.must_equal "Kermy Frog"
53
- csv2.rows.first.job_title.must_equal "frog"
54
- csv2.rows.last.age.to_i.must_equal 19
55
- File.basename(csv2.rows.first.csv_file).must_equal "persons2.csv"
109
+ parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol, headers: "first_col,second_col,third_col,fourth_col")
110
+ parser.header_line_number.must_equal nil
111
+ parser.pre_header_rows.must_be_empty
112
+ table = parser.read
113
+ table.length.must_equal 6
114
+ table[0].first_col.must_equal "full-name"
115
+ table[1].first_col.must_equal "bob"
56
116
  end
57
117
 
58
118
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-autoparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Delsol
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2014-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -49,6 +49,7 @@ files:
49
49
  - README.md
50
50
  - Rakefile
51
51
  - csv-autoparser.gemspec
52
+ - examples/basic.rb
52
53
  - lib/csv/autoparser.rb
53
54
  - lib/csv/autoparser/version.rb
54
55
  - test/fixtures/persons.csv