csv-autoparser 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -13
- data/examples/basic.rb +72 -0
- data/lib/csv/autoparser.rb +75 -23
- data/lib/csv/autoparser/version.rb +2 -2
- data/test/test_csv/autoparser.rb +89 -29
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 202d8f86d22651aeecb94ab9b5a839c1df8d0117
|
4
|
+
data.tar.gz: 06df0597fc5d4bd6df4f9f7da5ded7bdda5542c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 409f3c4d1505698a2e248309d061a0c77c98c965081361a058756a215eb5eb3601b5385604065a231482dde5b55af48e4cf6b58d9a4200926c42c1ced7521a90
|
7
|
+
data.tar.gz: 9fc72adf4b37cd1b86fbe62e0f47e73731aa7c70963192bc4e6cf4f5afef6feeb71c9673a98c4f0a20a2880d376459f24abaf5a7f6239a9a99a36c310b0924f1
|
data/README.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# CSV::AutoParser
|
2
2
|
|
3
|
-
CSV::AutoParser automatically parses a CSV file given a user specified header row
|
3
|
+
CSV::AutoParser automatically parses a CSV file given a user specified header row and
|
4
|
+
adds method style accessors to the CSV::Row data.
|
5
|
+
|
6
|
+
All functionality of the standard Ruby CSV class is accessible since CSV::AutoParser
|
7
|
+
inherits from CSV. CSV::AutoParser objects behave just like CSV objects when not
|
8
|
+
provided a block (though method style accessors will still be avaliable on CSV::Row
|
9
|
+
objects).
|
4
10
|
|
5
11
|
## Installation
|
6
12
|
|
@@ -8,18 +14,36 @@ CSV::AutoParser automatically parses a CSV file given a user specified header ro
|
|
8
14
|
|
9
15
|
## Usage
|
10
16
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
```ruby
|
18
|
+
require 'csv/autoparser'
|
19
|
+
|
20
|
+
data = <<CSV
|
21
|
+
"this is",not,"the header"
|
22
|
+
"the real header","is easy","to find"
|
23
|
+
name,"Job title",age
|
24
|
+
"Jon Smith",blacksmith,55
|
25
|
+
"Jimmy Johnson",farmer,34
|
26
|
+
"Kimmy Kimmson","pig wrangler",29
|
27
|
+
CSV
|
28
|
+
|
29
|
+
# ID header row by CSV line number.
|
30
|
+
csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
|
31
|
+
csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
|
32
|
+
# Jon Smith is a 55 year old blacksmith.
|
33
|
+
# Jimmy Johnson is a 34 year old farmer.
|
34
|
+
# Kimmy Kimmson is a 29 year old pig wrangler.
|
35
|
+
|
36
|
+
# -OR- ID header row by column header names.
|
37
|
+
csv = CSV::AutoParser.new(data) do |line_num, header_row|
|
38
|
+
["name", "Job title"].all? {|field| header_row.include?(field) }
|
39
|
+
end
|
40
|
+
csv.is_a?(CSV) # => true
|
41
|
+
table = csv.read # => CSV::Table
|
42
|
+
table.first.name # => "Jon Smith"
|
43
|
+
table[-1].job_title # => "pig wrangler"
|
44
|
+
```
|
45
|
+
|
46
|
+
More usage examples can be seen in examples/.
|
23
47
|
|
24
48
|
## Contributing
|
25
49
|
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
3
|
+
|
4
|
+
require 'csv/autoparser'
|
5
|
+
|
6
|
+
data = <<CSV
|
7
|
+
"this is",not,"the header"
|
8
|
+
"the real header","is easy","to find"
|
9
|
+
name,"Job title",age
|
10
|
+
"Jon Smith",blacksmith,55
|
11
|
+
"Jimmy Johnson",farmer,34
|
12
|
+
"Kimmy Kimmson","pig wrangler",29
|
13
|
+
CSV
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Indentifying the Header Row
|
17
|
+
########################################
|
18
|
+
|
19
|
+
# ID header row by CSV line number.
|
20
|
+
csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
|
21
|
+
csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
|
22
|
+
# Jon Smith is a 55 year old blacksmith.
|
23
|
+
# Jimmy Johnson is a 34 year old farmer.
|
24
|
+
# Kimmy Kimmson is a 29 year old pig wrangler.
|
25
|
+
|
26
|
+
# -OR- ID header row by column header names.
|
27
|
+
csv = CSV::AutoParser.new(data) do |line_num, header_row|
|
28
|
+
["name", "Job title"].all? {|field| header_row.include?(field) }
|
29
|
+
end
|
30
|
+
csv.is_a?(CSV) # => true
|
31
|
+
table = csv.read # => CSV::Table
|
32
|
+
table.first.name # => "Jon Smith"
|
33
|
+
table[-1].job_title # => "pig wrangler"
|
34
|
+
|
35
|
+
|
36
|
+
################################################
|
37
|
+
# Want to get at the data above the header row?
|
38
|
+
################################################
|
39
|
+
|
40
|
+
csv = CSV::AutoParser.new(File.expand_path("../../test/fixtures/persons.csv", __FILE__)) {|lineno, hr| lineno == 3 }
|
41
|
+
csv.pre_header_rows.each do |row|
|
42
|
+
puts "#{row.file}(#{row.line}): #{row.inspect}"
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
########################################
|
47
|
+
# Optional Header Columns
|
48
|
+
########################################
|
49
|
+
|
50
|
+
data2 = <<CSV
|
51
|
+
name,"city name","Job title"
|
52
|
+
"Jon Smith",Sacramento,blacksmith
|
53
|
+
"Jimmy Johnson","San Diego",farmer
|
54
|
+
"Kimmy Kimmson",Austin,"pig wrangler"
|
55
|
+
CSV
|
56
|
+
|
57
|
+
# When CSV columns are optional, specify it through options so that a NoMethodError is not raised.
|
58
|
+
def demo_optional_header_columns data
|
59
|
+
csv = CSV::AutoParser.new(data, optional_headers: ["City Name", :age]) {|lineno, hr| hr.include? "name" }
|
60
|
+
csv.each do |row|
|
61
|
+
# row.birth_date # => raises NoMethodError
|
62
|
+
if row.city_name
|
63
|
+
puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title} living in #{row.city_name}."
|
64
|
+
else
|
65
|
+
puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title}."
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
demo_optional_header_columns(data)
|
71
|
+
demo_optional_header_columns(data2)
|
72
|
+
|
data/lib/csv/autoparser.rb
CHANGED
@@ -2,46 +2,98 @@ require "csv"
|
|
2
2
|
require "csv/autoparser/version"
|
3
3
|
|
4
4
|
class CSV
|
5
|
-
class AutoParser
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
class Row
|
7
|
+
alias_method :orig_initialize, :initialize
|
8
|
+
# Defines method style accessors based on header row names.
|
9
|
+
def initialize(*args)
|
10
|
+
orig_initialize(*args)
|
11
|
+
if field_row?
|
12
|
+
headers.each do |h|
|
13
|
+
define_singleton_method(CSV::AutoParser.convert_header_to_method_name(h)) { fetch(h) }
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class AutoParser < CSV
|
20
|
+
|
21
|
+
# This is the method called by AutoParser to turn header names into legal method names.
|
22
|
+
# Redefine as necessary.
|
23
|
+
def self.convert_header_to_method_name header
|
24
|
+
header.to_s.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '').to_sym
|
25
|
+
end
|
26
|
+
|
27
|
+
# The rows found before the header row are paired with file and line information. These
|
28
|
+
# objects are available through CSV::AutoParser#pre_header_rows.
|
29
|
+
class PreHeaderRow < Array
|
30
|
+
attr_reader :file, :line
|
9
31
|
def self.create original_row, file, line
|
10
|
-
row =
|
11
|
-
row.instance_eval { @
|
32
|
+
row = PreHeaderRow.new(original_row)
|
33
|
+
row.instance_eval { @file = file; @line = line }
|
12
34
|
return row
|
13
35
|
end
|
14
36
|
end
|
15
37
|
|
16
38
|
class HeaderRowNotFound < RuntimeError; end
|
17
39
|
|
18
|
-
attr_reader :pre_header_rows, :
|
40
|
+
attr_reader :pre_header_rows, :header_line_number
|
19
41
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
42
|
+
# +data+ can be path of CSV file in addition to a CSV String or an IO object like CSV.new.
|
43
|
+
# All CSV.new options are supported via +opts+. If an +&is_header+ block is provided, it
|
44
|
+
# takes precedence over the CSV.new +:headers+ option. A +:optional_headers+ option has
|
45
|
+
# been added for specifying headers that may not be present in the CSV, but you do not want
|
46
|
+
# a NoMethodError to raise when accessing a field using the header method style accessor.
|
47
|
+
def initialize data, opts={}, &is_header
|
48
|
+
@header_line_number = nil
|
24
49
|
@pre_header_rows = []
|
25
|
-
|
26
|
-
|
27
|
-
|
50
|
+
@optional_headers = [opts.delete(:optional_headers)].flatten.compact
|
51
|
+
if data.is_a?(String) and File.exists?(data)
|
52
|
+
file = data
|
53
|
+
data = File.open(data)
|
54
|
+
end
|
55
|
+
if block_given?
|
56
|
+
data_io = if data.is_a?(IO)
|
57
|
+
data
|
58
|
+
elsif data.is_a?(String)
|
59
|
+
StringIO.new(data)
|
60
|
+
else
|
61
|
+
raise ArgumentError, "data must be a path to a CSV file, a CSV formatted String, or an IO object."
|
62
|
+
end
|
63
|
+
header_pos = data_io.pos
|
64
|
+
csv_line_number = 0
|
65
|
+
header_finder = CSV.new(data_io, opts.merge(:headers => false)).each do |row|
|
66
|
+
csv_line_number += 1
|
28
67
|
if is_header.call(csv_line_number, row)
|
29
|
-
|
68
|
+
@header_line_number = csv_line_number
|
69
|
+
break
|
30
70
|
else
|
31
|
-
@pre_header_rows <<
|
32
|
-
end
|
33
|
-
else
|
34
|
-
@rows << Row::create(row, file, csv_line_number)
|
35
|
-
map.each_pair do |column_name, column_offset|
|
36
|
-
@rows.last.define_singleton_method(column_to_method_name(column_name)) { self[column_offset] }
|
71
|
+
@pre_header_rows << CSV::AutoParser::PreHeaderRow.create(row, file, csv_line_number)
|
37
72
|
end
|
73
|
+
header_pos = data_io.pos
|
38
74
|
end
|
75
|
+
raise HeaderRowNotFound, "Could not find header row#{file ? " in #{file}" : "" }." if @header_line_number.nil?
|
76
|
+
data_io.seek header_pos
|
77
|
+
data_io = StringIO.new(data_io.read)
|
78
|
+
super(data_io, opts.merge(:headers => true))
|
79
|
+
else
|
80
|
+
@header_line_number = 1 if opts[:headers] == :first_row or opts[:headers] == true
|
81
|
+
super(data, opts)
|
39
82
|
end
|
40
|
-
raise HeaderRowNotFound, "Could not find header row in #{file}." if map.empty?
|
41
83
|
end
|
42
84
|
|
43
|
-
|
44
|
-
|
85
|
+
alias_method :orig_shift, :shift
|
86
|
+
|
87
|
+
# Overriden to add methods for optional headers which were not present in the CSV.
|
88
|
+
def shift
|
89
|
+
row = orig_shift
|
90
|
+
[@optional_headers].flatten.compact.each do |h|
|
91
|
+
method_name = self.class.convert_header_to_method_name(h)
|
92
|
+
unless row.respond_to? method_name
|
93
|
+
row.define_singleton_method(method_name) {nil}
|
94
|
+
end
|
95
|
+
end
|
96
|
+
return row
|
45
97
|
end
|
46
98
|
|
47
99
|
end
|
data/test/test_csv/autoparser.rb
CHANGED
@@ -4,29 +4,34 @@ require 'minitest_helper'
|
|
4
4
|
describe CSV::AutoParser do
|
5
5
|
|
6
6
|
it "it can parse a csv automatically via csv line number id" do
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }.read
|
8
|
+
table.class.must_equal CSV::Table
|
9
|
+
table.length.must_equal 5
|
10
|
+
table.first["full-name"].must_equal "bob"
|
11
|
+
table.first.full_name.must_equal "bob"
|
10
12
|
end
|
11
13
|
|
12
14
|
it "it can parse a csv automatically via header row id" do
|
13
|
-
|
15
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol) do |line_num, header_row|
|
14
16
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
end.read
|
18
|
+
table.length.must_equal 3
|
19
|
+
table.first.name.must_equal "Jon Smith"
|
20
|
+
table.first["Job title"].must_be_nil
|
21
|
+
table.first[:job_title].must_equal "blacksmith"
|
22
|
+
table.first.job_title.must_equal "blacksmith"
|
23
|
+
table[-1].age.to_i.must_equal 29
|
20
24
|
end
|
21
25
|
|
22
26
|
it "it will give you the rows found before the header row" do
|
23
|
-
|
27
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
24
28
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
25
29
|
end
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
parser.header_line_number.must_equal 3
|
31
|
+
parser.pre_header_rows.first.last.must_equal "years of age"
|
32
|
+
parser.pre_header_rows.last.first.must_equal "bob"
|
33
|
+
File.basename(parser.pre_header_rows.last.file).must_equal "persons.csv"
|
34
|
+
parser.pre_header_rows.last.line.must_equal 2
|
30
35
|
end
|
31
36
|
|
32
37
|
it "will raise an exception if it can't find the header row" do
|
@@ -35,24 +40,79 @@ describe CSV::AutoParser do
|
|
35
40
|
end
|
36
41
|
|
37
42
|
it "will not confuse column information with another csv which is parsed simultaneously" do
|
38
|
-
|
43
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
39
44
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
40
|
-
end
|
41
|
-
|
45
|
+
end.read
|
46
|
+
table2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
|
47
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
48
|
+
end.read
|
49
|
+
table.length.must_equal 3
|
50
|
+
table.first.name.must_equal "Jon Smith"
|
51
|
+
table.first.job_title.must_equal "blacksmith"
|
52
|
+
table[-1].age.to_i.must_equal 29
|
53
|
+
table[1].job_title.must_equal "farmer"
|
54
|
+
|
55
|
+
table2.length.must_equal 2
|
56
|
+
table2.first.name.must_equal "Kermy Frog"
|
57
|
+
table2.first.job_title.must_equal "frog"
|
58
|
+
table2[-1].age.to_i.must_equal 19
|
59
|
+
end
|
60
|
+
|
61
|
+
it "will define methods which return nil for optional columns not present in CSV" do
|
62
|
+
table1 = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
63
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
64
|
+
end.read
|
65
|
+
lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
|
66
|
+
table2 = CSV::AutoParser.new(fixture_file_path('persons.csv'), optional_headers: [:my_optional_header, "Zip Code", :name]) do |line_num, header_row|
|
42
67
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
68
|
+
end.read
|
69
|
+
table2.first.my_optional_header.must_be_nil
|
70
|
+
lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
|
71
|
+
table2.first.name.must_equal "Jon Smith"
|
72
|
+
lambda { table2.first.my_mandatory_header }.must_raise(NoMethodError)
|
73
|
+
table2.first.zip_code.must_be_nil
|
74
|
+
table2[-1].my_optional_header.must_be_nil
|
75
|
+
end
|
76
|
+
|
77
|
+
it "will pass along CSV.new options" do
|
78
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|l, hr| l == 1 }
|
79
|
+
parser.field_size_limit.must_be_nil
|
80
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), field_size_limit: 100) {|l, hr| l == 1 }
|
81
|
+
parser.field_size_limit.must_equal 100
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should work with a CSV string or an IO object too" do
|
85
|
+
input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
|
86
|
+
input_objects.each do |obj|
|
87
|
+
table = CSV::AutoParser.new(obj) do |line_num, header_row|
|
88
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
89
|
+
end.read
|
90
|
+
table.length.must_equal 3
|
91
|
+
table.first.name.must_equal "Jon Smith"
|
92
|
+
table.first.job_title.must_equal "blacksmith"
|
93
|
+
table[-1].age.to_i.must_equal 29
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should work just like CSV.new when not passed a block except that it can now take a file path as data too" do
|
98
|
+
input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
|
99
|
+
input_objects.each do |obj|
|
100
|
+
parser = CSV::AutoParser.new(obj, header_converters: :symbol, headers: :first_row)
|
101
|
+
parser.header_line_number.must_equal 1
|
102
|
+
parser.pre_header_rows.must_be_empty
|
103
|
+
table = parser.read
|
104
|
+
table.length.must_equal 5
|
105
|
+
table.first[:fullname].must_equal "bob"
|
106
|
+
# method names are based off of converted header names!
|
107
|
+
table.first.fullname.must_equal "bob"
|
43
108
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
csv2.rows.length.must_equal 2
|
52
|
-
csv2.rows.first.name.must_equal "Kermy Frog"
|
53
|
-
csv2.rows.first.job_title.must_equal "frog"
|
54
|
-
csv2.rows.last.age.to_i.must_equal 19
|
55
|
-
File.basename(csv2.rows.first.csv_file).must_equal "persons2.csv"
|
109
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol, headers: "first_col,second_col,third_col,fourth_col")
|
110
|
+
parser.header_line_number.must_equal nil
|
111
|
+
parser.pre_header_rows.must_be_empty
|
112
|
+
table = parser.read
|
113
|
+
table.length.must_equal 6
|
114
|
+
table[0].first_col.must_equal "full-name"
|
115
|
+
table[1].first_col.must_equal "bob"
|
56
116
|
end
|
57
117
|
|
58
118
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-autoparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Delsol
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -49,6 +49,7 @@ files:
|
|
49
49
|
- README.md
|
50
50
|
- Rakefile
|
51
51
|
- csv-autoparser.gemspec
|
52
|
+
- examples/basic.rb
|
52
53
|
- lib/csv/autoparser.rb
|
53
54
|
- lib/csv/autoparser/version.rb
|
54
55
|
- test/fixtures/persons.csv
|