csv-autoparser 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -13
- data/examples/basic.rb +72 -0
- data/lib/csv/autoparser.rb +75 -23
- data/lib/csv/autoparser/version.rb +2 -2
- data/test/test_csv/autoparser.rb +89 -29
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 202d8f86d22651aeecb94ab9b5a839c1df8d0117
|
4
|
+
data.tar.gz: 06df0597fc5d4bd6df4f9f7da5ded7bdda5542c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 409f3c4d1505698a2e248309d061a0c77c98c965081361a058756a215eb5eb3601b5385604065a231482dde5b55af48e4cf6b58d9a4200926c42c1ced7521a90
|
7
|
+
data.tar.gz: 9fc72adf4b37cd1b86fbe62e0f47e73731aa7c70963192bc4e6cf4f5afef6feeb71c9673a98c4f0a20a2880d376459f24abaf5a7f6239a9a99a36c310b0924f1
|
data/README.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# CSV::AutoParser
|
2
2
|
|
3
|
-
CSV::AutoParser automatically parses a CSV file given a user specified header row
|
3
|
+
CSV::AutoParser automatically parses a CSV file given a user specified header row and
|
4
|
+
adds method style accessors to the CSV::Row data.
|
5
|
+
|
6
|
+
All functionality of the standard Ruby CSV class is accessible since CSV::AutoParser
|
7
|
+
inherits from CSV. CSV::AutoParser objects behave just like CSV objects when not
|
8
|
+
provided a block (though method style accessors will still be avaliable on CSV::Row
|
9
|
+
objects).
|
4
10
|
|
5
11
|
## Installation
|
6
12
|
|
@@ -8,18 +14,36 @@ CSV::AutoParser automatically parses a CSV file given a user specified header ro
|
|
8
14
|
|
9
15
|
## Usage
|
10
16
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
```ruby
|
18
|
+
require 'csv/autoparser'
|
19
|
+
|
20
|
+
data = <<CSV
|
21
|
+
"this is",not,"the header"
|
22
|
+
"the real header","is easy","to find"
|
23
|
+
name,"Job title",age
|
24
|
+
"Jon Smith",blacksmith,55
|
25
|
+
"Jimmy Johnson",farmer,34
|
26
|
+
"Kimmy Kimmson","pig wrangler",29
|
27
|
+
CSV
|
28
|
+
|
29
|
+
# ID header row by CSV line number.
|
30
|
+
csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
|
31
|
+
csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
|
32
|
+
# Jon Smith is a 55 year old blacksmith.
|
33
|
+
# Jimmy Johnson is a 34 year old farmer.
|
34
|
+
# Kimmy Kimmson is a 29 year old pig wrangler.
|
35
|
+
|
36
|
+
# -OR- ID header row by column header names.
|
37
|
+
csv = CSV::AutoParser.new(data) do |line_num, header_row|
|
38
|
+
["name", "Job title"].all? {|field| header_row.include?(field) }
|
39
|
+
end
|
40
|
+
csv.is_a?(CSV) # => true
|
41
|
+
table = csv.read # => CSV::Table
|
42
|
+
table.first.name # => "Jon Smith"
|
43
|
+
table[-1].job_title # => "pig wrangler"
|
44
|
+
```
|
45
|
+
|
46
|
+
More usage examples can be seen in examples/.
|
23
47
|
|
24
48
|
## Contributing
|
25
49
|
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
3
|
+
|
4
|
+
require 'csv/autoparser'
|
5
|
+
|
6
|
+
data = <<CSV
|
7
|
+
"this is",not,"the header"
|
8
|
+
"the real header","is easy","to find"
|
9
|
+
name,"Job title",age
|
10
|
+
"Jon Smith",blacksmith,55
|
11
|
+
"Jimmy Johnson",farmer,34
|
12
|
+
"Kimmy Kimmson","pig wrangler",29
|
13
|
+
CSV
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Indentifying the Header Row
|
17
|
+
########################################
|
18
|
+
|
19
|
+
# ID header row by CSV line number.
|
20
|
+
csv = CSV::AutoParser.new(data) {|line_number, header_row| line_number == 3 }
|
21
|
+
csv.each {|row| puts "#{row.name} is a #{row.age} year old #{row.job_title}." }
|
22
|
+
# Jon Smith is a 55 year old blacksmith.
|
23
|
+
# Jimmy Johnson is a 34 year old farmer.
|
24
|
+
# Kimmy Kimmson is a 29 year old pig wrangler.
|
25
|
+
|
26
|
+
# -OR- ID header row by column header names.
|
27
|
+
csv = CSV::AutoParser.new(data) do |line_num, header_row|
|
28
|
+
["name", "Job title"].all? {|field| header_row.include?(field) }
|
29
|
+
end
|
30
|
+
csv.is_a?(CSV) # => true
|
31
|
+
table = csv.read # => CSV::Table
|
32
|
+
table.first.name # => "Jon Smith"
|
33
|
+
table[-1].job_title # => "pig wrangler"
|
34
|
+
|
35
|
+
|
36
|
+
################################################
|
37
|
+
# Want to get at the data above the header row?
|
38
|
+
################################################
|
39
|
+
|
40
|
+
csv = CSV::AutoParser.new(File.expand_path("../../test/fixtures/persons.csv", __FILE__)) {|lineno, hr| lineno == 3 }
|
41
|
+
csv.pre_header_rows.each do |row|
|
42
|
+
puts "#{row.file}(#{row.line}): #{row.inspect}"
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
########################################
|
47
|
+
# Optional Header Columns
|
48
|
+
########################################
|
49
|
+
|
50
|
+
data2 = <<CSV
|
51
|
+
name,"city name","Job title"
|
52
|
+
"Jon Smith",Sacramento,blacksmith
|
53
|
+
"Jimmy Johnson","San Diego",farmer
|
54
|
+
"Kimmy Kimmson",Austin,"pig wrangler"
|
55
|
+
CSV
|
56
|
+
|
57
|
+
# When CSV columns are optional, specify it through options so that a NoMethodError is not raised.
|
58
|
+
def demo_optional_header_columns data
|
59
|
+
csv = CSV::AutoParser.new(data, optional_headers: ["City Name", :age]) {|lineno, hr| hr.include? "name" }
|
60
|
+
csv.each do |row|
|
61
|
+
# row.birth_date # => raises NoMethodError
|
62
|
+
if row.city_name
|
63
|
+
puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title} living in #{row.city_name}."
|
64
|
+
else
|
65
|
+
puts "#{row.name} is a #{row.age || "?"} year old #{row.job_title}."
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
demo_optional_header_columns(data)
|
71
|
+
demo_optional_header_columns(data2)
|
72
|
+
|
data/lib/csv/autoparser.rb
CHANGED
@@ -2,46 +2,98 @@ require "csv"
|
|
2
2
|
require "csv/autoparser/version"
|
3
3
|
|
4
4
|
class CSV
|
5
|
-
class AutoParser
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
class Row
|
7
|
+
alias_method :orig_initialize, :initialize
|
8
|
+
# Defines method style accessors based on header row names.
|
9
|
+
def initialize(*args)
|
10
|
+
orig_initialize(*args)
|
11
|
+
if field_row?
|
12
|
+
headers.each do |h|
|
13
|
+
define_singleton_method(CSV::AutoParser.convert_header_to_method_name(h)) { fetch(h) }
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class AutoParser < CSV
|
20
|
+
|
21
|
+
# This is the method called by AutoParser to turn header names into legal method names.
|
22
|
+
# Redefine as necessary.
|
23
|
+
def self.convert_header_to_method_name header
|
24
|
+
header.to_s.downcase.strip.gsub(/\s+/, '_').gsub(/-+/, '_').gsub(/[^\w]/, '').to_sym
|
25
|
+
end
|
26
|
+
|
27
|
+
# The rows found before the header row are paired with file and line information. These
|
28
|
+
# objects are available through CSV::AutoParser#pre_header_rows.
|
29
|
+
class PreHeaderRow < Array
|
30
|
+
attr_reader :file, :line
|
9
31
|
def self.create original_row, file, line
|
10
|
-
row =
|
11
|
-
row.instance_eval { @
|
32
|
+
row = PreHeaderRow.new(original_row)
|
33
|
+
row.instance_eval { @file = file; @line = line }
|
12
34
|
return row
|
13
35
|
end
|
14
36
|
end
|
15
37
|
|
16
38
|
class HeaderRowNotFound < RuntimeError; end
|
17
39
|
|
18
|
-
attr_reader :pre_header_rows, :
|
40
|
+
attr_reader :pre_header_rows, :header_line_number
|
19
41
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
42
|
+
# +data+ can be path of CSV file in addition to a CSV String or an IO object like CSV.new.
|
43
|
+
# All CSV.new options are supported via +opts+. If an +&is_header+ block is provided, it
|
44
|
+
# takes precedence over the CSV.new +:headers+ option. A +:optional_headers+ option has
|
45
|
+
# been added for specifying headers that may not be present in the CSV, but you do not want
|
46
|
+
# a NoMethodError to raise when accessing a field using the header method style accessor.
|
47
|
+
def initialize data, opts={}, &is_header
|
48
|
+
@header_line_number = nil
|
24
49
|
@pre_header_rows = []
|
25
|
-
|
26
|
-
|
27
|
-
|
50
|
+
@optional_headers = [opts.delete(:optional_headers)].flatten.compact
|
51
|
+
if data.is_a?(String) and File.exists?(data)
|
52
|
+
file = data
|
53
|
+
data = File.open(data)
|
54
|
+
end
|
55
|
+
if block_given?
|
56
|
+
data_io = if data.is_a?(IO)
|
57
|
+
data
|
58
|
+
elsif data.is_a?(String)
|
59
|
+
StringIO.new(data)
|
60
|
+
else
|
61
|
+
raise ArgumentError, "data must be a path to a CSV file, a CSV formatted String, or an IO object."
|
62
|
+
end
|
63
|
+
header_pos = data_io.pos
|
64
|
+
csv_line_number = 0
|
65
|
+
header_finder = CSV.new(data_io, opts.merge(:headers => false)).each do |row|
|
66
|
+
csv_line_number += 1
|
28
67
|
if is_header.call(csv_line_number, row)
|
29
|
-
|
68
|
+
@header_line_number = csv_line_number
|
69
|
+
break
|
30
70
|
else
|
31
|
-
@pre_header_rows <<
|
32
|
-
end
|
33
|
-
else
|
34
|
-
@rows << Row::create(row, file, csv_line_number)
|
35
|
-
map.each_pair do |column_name, column_offset|
|
36
|
-
@rows.last.define_singleton_method(column_to_method_name(column_name)) { self[column_offset] }
|
71
|
+
@pre_header_rows << CSV::AutoParser::PreHeaderRow.create(row, file, csv_line_number)
|
37
72
|
end
|
73
|
+
header_pos = data_io.pos
|
38
74
|
end
|
75
|
+
raise HeaderRowNotFound, "Could not find header row#{file ? " in #{file}" : "" }." if @header_line_number.nil?
|
76
|
+
data_io.seek header_pos
|
77
|
+
data_io = StringIO.new(data_io.read)
|
78
|
+
super(data_io, opts.merge(:headers => true))
|
79
|
+
else
|
80
|
+
@header_line_number = 1 if opts[:headers] == :first_row or opts[:headers] == true
|
81
|
+
super(data, opts)
|
39
82
|
end
|
40
|
-
raise HeaderRowNotFound, "Could not find header row in #{file}." if map.empty?
|
41
83
|
end
|
42
84
|
|
43
|
-
|
44
|
-
|
85
|
+
alias_method :orig_shift, :shift
|
86
|
+
|
87
|
+
# Overriden to add methods for optional headers which were not present in the CSV.
|
88
|
+
def shift
|
89
|
+
row = orig_shift
|
90
|
+
[@optional_headers].flatten.compact.each do |h|
|
91
|
+
method_name = self.class.convert_header_to_method_name(h)
|
92
|
+
unless row.respond_to? method_name
|
93
|
+
row.define_singleton_method(method_name) {nil}
|
94
|
+
end
|
95
|
+
end
|
96
|
+
return row
|
45
97
|
end
|
46
98
|
|
47
99
|
end
|
data/test/test_csv/autoparser.rb
CHANGED
@@ -4,29 +4,34 @@ require 'minitest_helper'
|
|
4
4
|
describe CSV::AutoParser do
|
5
5
|
|
6
6
|
it "it can parse a csv automatically via csv line number id" do
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|csv_line_number, header_row| csv_line_number == 1 }.read
|
8
|
+
table.class.must_equal CSV::Table
|
9
|
+
table.length.must_equal 5
|
10
|
+
table.first["full-name"].must_equal "bob"
|
11
|
+
table.first.full_name.must_equal "bob"
|
10
12
|
end
|
11
13
|
|
12
14
|
it "it can parse a csv automatically via header row id" do
|
13
|
-
|
15
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol) do |line_num, header_row|
|
14
16
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
end.read
|
18
|
+
table.length.must_equal 3
|
19
|
+
table.first.name.must_equal "Jon Smith"
|
20
|
+
table.first["Job title"].must_be_nil
|
21
|
+
table.first[:job_title].must_equal "blacksmith"
|
22
|
+
table.first.job_title.must_equal "blacksmith"
|
23
|
+
table[-1].age.to_i.must_equal 29
|
20
24
|
end
|
21
25
|
|
22
26
|
it "it will give you the rows found before the header row" do
|
23
|
-
|
27
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
24
28
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
25
29
|
end
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
parser.header_line_number.must_equal 3
|
31
|
+
parser.pre_header_rows.first.last.must_equal "years of age"
|
32
|
+
parser.pre_header_rows.last.first.must_equal "bob"
|
33
|
+
File.basename(parser.pre_header_rows.last.file).must_equal "persons.csv"
|
34
|
+
parser.pre_header_rows.last.line.must_equal 2
|
30
35
|
end
|
31
36
|
|
32
37
|
it "will raise an exception if it can't find the header row" do
|
@@ -35,24 +40,79 @@ describe CSV::AutoParser do
|
|
35
40
|
end
|
36
41
|
|
37
42
|
it "will not confuse column information with another csv which is parsed simultaneously" do
|
38
|
-
|
43
|
+
table = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
39
44
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
40
|
-
end
|
41
|
-
|
45
|
+
end.read
|
46
|
+
table2 = CSV::AutoParser.new(fixture_file_path('persons2.csv')) do |line_num, header_row|
|
47
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
48
|
+
end.read
|
49
|
+
table.length.must_equal 3
|
50
|
+
table.first.name.must_equal "Jon Smith"
|
51
|
+
table.first.job_title.must_equal "blacksmith"
|
52
|
+
table[-1].age.to_i.must_equal 29
|
53
|
+
table[1].job_title.must_equal "farmer"
|
54
|
+
|
55
|
+
table2.length.must_equal 2
|
56
|
+
table2.first.name.must_equal "Kermy Frog"
|
57
|
+
table2.first.job_title.must_equal "frog"
|
58
|
+
table2[-1].age.to_i.must_equal 19
|
59
|
+
end
|
60
|
+
|
61
|
+
it "will define methods which return nil for optional columns not present in CSV" do
|
62
|
+
table1 = CSV::AutoParser.new(fixture_file_path('persons.csv')) do |line_num, header_row|
|
63
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
64
|
+
end.read
|
65
|
+
lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
|
66
|
+
table2 = CSV::AutoParser.new(fixture_file_path('persons.csv'), optional_headers: [:my_optional_header, "Zip Code", :name]) do |line_num, header_row|
|
42
67
|
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
68
|
+
end.read
|
69
|
+
table2.first.my_optional_header.must_be_nil
|
70
|
+
lambda { table1.first.my_optional_header }.must_raise(NoMethodError)
|
71
|
+
table2.first.name.must_equal "Jon Smith"
|
72
|
+
lambda { table2.first.my_mandatory_header }.must_raise(NoMethodError)
|
73
|
+
table2.first.zip_code.must_be_nil
|
74
|
+
table2[-1].my_optional_header.must_be_nil
|
75
|
+
end
|
76
|
+
|
77
|
+
it "will pass along CSV.new options" do
|
78
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv')) {|l, hr| l == 1 }
|
79
|
+
parser.field_size_limit.must_be_nil
|
80
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), field_size_limit: 100) {|l, hr| l == 1 }
|
81
|
+
parser.field_size_limit.must_equal 100
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should work with a CSV string or an IO object too" do
|
85
|
+
input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
|
86
|
+
input_objects.each do |obj|
|
87
|
+
table = CSV::AutoParser.new(obj) do |line_num, header_row|
|
88
|
+
["name", "Job title"].all? {|cell| header_row.include?(cell) }
|
89
|
+
end.read
|
90
|
+
table.length.must_equal 3
|
91
|
+
table.first.name.must_equal "Jon Smith"
|
92
|
+
table.first.job_title.must_equal "blacksmith"
|
93
|
+
table[-1].age.to_i.must_equal 29
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should work just like CSV.new when not passed a block except that it can now take a file path as data too" do
|
98
|
+
input_objects = [fixture_file_path('persons.csv'), File.open(fixture_file_path('persons.csv')), File.open(fixture_file_path('persons.csv'))]
|
99
|
+
input_objects.each do |obj|
|
100
|
+
parser = CSV::AutoParser.new(obj, header_converters: :symbol, headers: :first_row)
|
101
|
+
parser.header_line_number.must_equal 1
|
102
|
+
parser.pre_header_rows.must_be_empty
|
103
|
+
table = parser.read
|
104
|
+
table.length.must_equal 5
|
105
|
+
table.first[:fullname].must_equal "bob"
|
106
|
+
# method names are based off of converted header names!
|
107
|
+
table.first.fullname.must_equal "bob"
|
43
108
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
csv2.rows.length.must_equal 2
|
52
|
-
csv2.rows.first.name.must_equal "Kermy Frog"
|
53
|
-
csv2.rows.first.job_title.must_equal "frog"
|
54
|
-
csv2.rows.last.age.to_i.must_equal 19
|
55
|
-
File.basename(csv2.rows.first.csv_file).must_equal "persons2.csv"
|
109
|
+
parser = CSV::AutoParser.new(fixture_file_path('persons.csv'), header_converters: :symbol, headers: "first_col,second_col,third_col,fourth_col")
|
110
|
+
parser.header_line_number.must_equal nil
|
111
|
+
parser.pre_header_rows.must_be_empty
|
112
|
+
table = parser.read
|
113
|
+
table.length.must_equal 6
|
114
|
+
table[0].first_col.must_equal "full-name"
|
115
|
+
table[1].first_col.must_equal "bob"
|
56
116
|
end
|
57
117
|
|
58
118
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-autoparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Delsol
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -49,6 +49,7 @@ files:
|
|
49
49
|
- README.md
|
50
50
|
- Rakefile
|
51
51
|
- csv-autoparser.gemspec
|
52
|
+
- examples/basic.rb
|
52
53
|
- lib/csv/autoparser.rb
|
53
54
|
- lib/csv/autoparser/version.rb
|
54
55
|
- test/fixtures/persons.csv
|