honey_format 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +47 -11
- data/lib/honey_format/csv.rb +15 -9
- data/lib/honey_format/header.rb +60 -13
- data/lib/honey_format/row.rb +8 -16
- data/lib/honey_format/row_builder.rb +2 -0
- data/lib/honey_format/rows.rb +3 -3
- data/lib/honey_format/version.rb +1 -1
- metadata +2 -4
- data/lib/honey_format/columns.rb +0 -50
- data/lib/honey_format/sanitize.rb +0 -46
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c3b35ed8d8cac5eace438a1eb5365b4970d96e2fee1a3faece4555eae44f1a3
|
4
|
+
data.tar.gz: 01ed3a3b5bc973b5882c6ec2f61c647bf24d0edadd76c7155568e2a6c2cbcbbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af026b70d5fa369633788e5be0796723161b8564aca45084e704fa0691ad7da1fbe1a4578262dec60806d71fe1757984630c8d3e7be1fc4df276b0c3bf3cac1f
|
7
|
+
data.tar.gz: 267b348cff25cffb1204e26ec4f34f60fafb4154b44afa6005599bfd71ab8946024be7fd56deca52ff5c5ee31f53fe4f4ab79fd25ead5df0ad79ece91cfdeb5d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# v0.7.0
|
2
|
+
|
3
|
+
- Don't sanitize each row :rocket: (improves performance from ~1.4x times slower than raw CSV to ~1.1)
|
4
|
+
- Fold `Columns` class into `Header`
|
5
|
+
- Remove `Sanitize` class
|
6
|
+
|
1
7
|
# v0.6.0
|
2
8
|
|
3
9
|
* Add `CSV#to_csv` ([PR#2](https://github.com/buren/honey_format/pull/2))
|
data/README.md
CHANGED
@@ -49,11 +49,30 @@ user.id # => "1"
|
|
49
49
|
user.username # => "buren"
|
50
50
|
```
|
51
51
|
|
52
|
-
|
52
|
+
Minimal custom row builder
|
53
53
|
```ruby
|
54
54
|
csv_string = "Id, Username\n 1, buren"
|
55
|
-
|
56
|
-
csv = HoneyFormat::CSV.new(csv_string, row_builder:
|
55
|
+
upcaser = ->(row) { row.username.upcase!; row }
|
56
|
+
csv = HoneyFormat::CSV.new(csv_string, row_builder: upcaser)
|
57
|
+
csv.rows # => [#<struct id="1", username="BUREN">]
|
58
|
+
```
|
59
|
+
|
60
|
+
Complete custom row builder
|
61
|
+
```ruby
|
62
|
+
class Anonymizer
|
63
|
+
def self.call(row)
|
64
|
+
# Return an object you want to represent the row
|
65
|
+
row.tap do |r|
|
66
|
+
r.name = '<anon>'
|
67
|
+
r.email = '<anon>'
|
68
|
+
r.ssn = '<anon>'
|
69
|
+
r.payment_id = '<scrubbed>'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
csv_string = "Id, Username\n 1, buren"
|
75
|
+
csv = HoneyFormat::CSV.new(csv_string, row_builder: Anonymizer)
|
57
76
|
csv.rows # => [#<struct id="1", username="BUREN">]
|
58
77
|
```
|
59
78
|
|
@@ -65,6 +84,11 @@ csv.rows.each { |row| row.id = nil }
|
|
65
84
|
csv.to_csv # => "Id, Username\n,buren\n"
|
66
85
|
```
|
67
86
|
|
87
|
+
You can of course set the delimiter
|
88
|
+
```ruby
|
89
|
+
HoneyFormat::CSV.new(csv_string, delimiter: ';')
|
90
|
+
```
|
91
|
+
|
68
92
|
Validate CSV header
|
69
93
|
```ruby
|
70
94
|
csv_string = "Id, Username\n 1, buren"
|
@@ -85,17 +109,29 @@ csv.rows.first.username # => "buren"
|
|
85
109
|
```
|
86
110
|
|
87
111
|
If your header contains special chars and/or chars that can't be part of Ruby method names,
|
88
|
-
things get a little awkward..
|
112
|
+
things can get a little awkward..
|
89
113
|
```ruby
|
90
114
|
csv_string = "ÅÄÖ\nSwedish characters"
|
91
115
|
user = HoneyFormat::CSV.new(csv_string).rows.first
|
92
|
-
# Note that these chars aren't "downcased",
|
116
|
+
# Note that these chars aren't "downcased" in Ruby 2.3 and older versions of Ruby,
|
93
117
|
# "ÅÄÖ".downcase # => "ÅÄÖ"
|
94
118
|
user.ÅÄÖ # => "Swedish characters"
|
119
|
+
# while on Ruby > 2.3
|
120
|
+
user.åäö
|
95
121
|
|
96
|
-
csv_string = "First
|
122
|
+
csv_string = "First^Name\nJacob"
|
97
123
|
user = HoneyFormat::CSV.new(csv_string).rows.first
|
98
|
-
user.public_send(:"first
|
124
|
+
user.public_send(:"first^name") # => "Jacob"
|
125
|
+
```
|
126
|
+
|
127
|
+
Pass your own header converter
|
128
|
+
```ruby
|
129
|
+
map = { 'First^Name' => :first_name }
|
130
|
+
converter = ->(column) { map.fetch(column, column) }
|
131
|
+
|
132
|
+
csv_string = "First^Name\nJacob"
|
133
|
+
user = HoneyFormat::CSV.new(csv_string, header_converter: converter).rows.first
|
134
|
+
user.first_name # => "Jacob"
|
99
135
|
```
|
100
136
|
|
101
137
|
If you want to see more usage examples check out the `spec/` directory.
|
@@ -113,15 +149,15 @@ $ bin/benchmark file.csv
|
|
113
149
|
204KB (1k lines)
|
114
150
|
|
115
151
|
```
|
116
|
-
stdlib CSV:
|
117
|
-
HoneyFormat::CSV:
|
152
|
+
stdlib CSV: 51.9 i/s
|
153
|
+
HoneyFormat::CSV: 49.6 i/s - 1.05x slower
|
118
154
|
```
|
119
155
|
|
120
156
|
19MB (100k lines)
|
121
157
|
|
122
158
|
```
|
123
|
-
stdlib CSV:
|
124
|
-
HoneyFormat::CSV:
|
159
|
+
stdlib CSV: 0.4 i/s
|
160
|
+
HoneyFormat::CSV: 0.4 i/s - 1.11x slower
|
125
161
|
```
|
126
162
|
|
127
163
|
## Development
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'csv'
|
2
2
|
|
3
|
-
require 'honey_format/sanitize'
|
4
3
|
require 'honey_format/rows'
|
5
4
|
require 'honey_format/header'
|
6
5
|
|
@@ -9,27 +8,29 @@ module HoneyFormat
|
|
9
8
|
class CSV
|
10
9
|
# @return [CSV] a new instance of CSV.
|
11
10
|
# @param [String] csv string.
|
12
|
-
# @param [Array] valid_columns valid array of symbols representing valid columns.
|
13
|
-
# @param [Array] header optional argument for CSV header
|
11
|
+
# @param [Array<Symbol>] valid_columns valid array of symbols representing valid columns if empty all will be considered valid.
|
12
|
+
# @param [Array<String>] header optional argument for CSV header
|
14
13
|
# @param [#call] row_builder will be called for each parsed row
|
15
14
|
# @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
|
16
15
|
# @raise [MissingCSVHeaderColumnError] raised when header column is missing.
|
17
16
|
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
18
|
-
def initialize(csv, delimiter: ',', header: nil, valid_columns:
|
17
|
+
def initialize(csv, delimiter: ',', header: nil, valid_columns: [], header_converter: ConvertHeaderValue, row_builder: nil)
|
19
18
|
csv = ::CSV.parse(csv, col_sep: delimiter)
|
20
19
|
header_row = header || csv.shift
|
21
20
|
@header = Header.new(header_row, valid: valid_columns, converter: header_converter)
|
22
21
|
@rows = Rows.new(csv, columns, builder: row_builder)
|
23
22
|
end
|
24
23
|
|
25
|
-
#
|
24
|
+
# Original CSV header
|
25
|
+
# @return [Array<String>] of strings for sanitized header.
|
26
26
|
def header
|
27
|
-
@header.
|
27
|
+
@header.original
|
28
28
|
end
|
29
29
|
|
30
|
-
#
|
30
|
+
# CSV columns
|
31
|
+
# @return [Array<Symbol>] of column identifiers.
|
31
32
|
def columns
|
32
|
-
@header.
|
33
|
+
@header.to_a
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [Array] of rows.
|
@@ -38,11 +39,16 @@ module HoneyFormat
|
|
38
39
|
@rows
|
39
40
|
end
|
40
41
|
|
41
|
-
# @yield [row] block
|
42
|
+
# @yield [row] The given block will be passed for every row.
|
43
|
+
# @yieldparam [Row] a colmn in the CSV header.
|
44
|
+
# @return [Enumerator] If no block is given, an enumerator object will be returned.
|
42
45
|
def each_row
|
46
|
+
return rows.each unless block_given?
|
47
|
+
|
43
48
|
rows.each { |row| yield(row) }
|
44
49
|
end
|
45
50
|
|
51
|
+
# Convert CSV object as CSV-string.
|
46
52
|
# @return [String] CSV-string representation.
|
47
53
|
def to_csv
|
48
54
|
header.to_csv + @rows.to_csv
|
data/lib/honey_format/header.rb
CHANGED
@@ -1,32 +1,79 @@
|
|
1
|
-
require 'honey_format/
|
1
|
+
require 'honey_format/convert_header_value'
|
2
2
|
|
3
3
|
module HoneyFormat
|
4
4
|
# Represents a header
|
5
5
|
class Header
|
6
|
-
|
6
|
+
include Enumerable
|
7
7
|
|
8
|
+
# Instantiate a Header
|
8
9
|
# @return [Header] a new instance of Header.
|
9
|
-
# @param [Array] header array of strings.
|
10
|
-
# @param [Array] valid array of symbols representing valid columns.
|
11
|
-
# @
|
12
|
-
|
10
|
+
# @param [Array<String>] header array of strings.
|
11
|
+
# @param [Array<Symbol>] valid array of symbols representing valid columns if empty all columns will be considered valid.
|
12
|
+
# @param converter [#call] header converter that implements a #call method that takes one column (string) argument.
|
13
|
+
# @raise [MissingCSVHeaderColumnError] raised when header is missing
|
14
|
+
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
15
|
+
# @example Instantiate a header with a customer converter
|
16
|
+
# converter = ->(col) { col == 'username' ? 'handle' : col }
|
17
|
+
# header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
|
18
|
+
# header.to_a # => ['name', 'handle']
|
19
|
+
def initialize(header, valid: [], converter: ConvertHeaderValue)
|
13
20
|
if header.nil? || header.empty?
|
14
|
-
|
21
|
+
raise(MissingCSVHeaderError, "CSV header can't be empty.")
|
15
22
|
end
|
16
23
|
|
17
|
-
@
|
18
|
-
@
|
24
|
+
@original_header = header.map { |col| col ? col.strip : nil }
|
25
|
+
@converter = converter
|
26
|
+
@columns = build_columns(@original_header, valid)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Array<String>] the original header
|
30
|
+
def original
|
31
|
+
@original_header
|
32
|
+
end
|
33
|
+
|
34
|
+
# @yield [row] The given block will be passed for every column.
|
35
|
+
# @yieldparam [Row] a colmn in the CSV header.
|
36
|
+
# @return [Enumerator]
|
37
|
+
# If no block is given, an enumerator object will be returned.
|
38
|
+
def each(&block)
|
39
|
+
@columns.each(&block)
|
19
40
|
end
|
20
41
|
|
21
42
|
# Returns columns as array.
|
22
|
-
# @return [Array] of columns.
|
23
|
-
def
|
24
|
-
@columns
|
43
|
+
# @return [Array<Symbol>] of columns.
|
44
|
+
def to_a
|
45
|
+
@columns
|
25
46
|
end
|
26
47
|
|
48
|
+
# Header as CSV-string
|
27
49
|
# @return [String] CSV-string representation.
|
28
50
|
def to_csv
|
29
|
-
columns.to_csv
|
51
|
+
@columns.to_csv
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Convert original header
|
57
|
+
# @param [Array<String>] header the original header
|
58
|
+
# @param [Array<Symbol>] valid list of valid column names if empty all are considered valid.
|
59
|
+
# @return [Array<String>] converted columns
|
60
|
+
def build_columns(header, valid)
|
61
|
+
valid = valid.map(&:to_sym)
|
62
|
+
|
63
|
+
header.map do |column|
|
64
|
+
column = @converter.call(column.dup)
|
65
|
+
|
66
|
+
if column.nil? || column.empty?
|
67
|
+
raise(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
|
68
|
+
end
|
69
|
+
|
70
|
+
if valid.any? && !valid.include?(column)
|
71
|
+
err_msg = "column :#{column} not in #{valid.inspect}"
|
72
|
+
raise(UnknownCSVHeaderColumnError, err_msg)
|
73
|
+
end
|
74
|
+
|
75
|
+
column
|
76
|
+
end
|
30
77
|
end
|
31
78
|
end
|
32
79
|
end
|
data/lib/honey_format/row.rb
CHANGED
@@ -11,9 +11,13 @@ module HoneyFormat
|
|
11
11
|
# @example Create new row
|
12
12
|
# Row.new!([:id])
|
13
13
|
def initialize(columns, builder: nil)
|
14
|
-
|
14
|
+
if columns.empty?
|
15
|
+
err_msg = 'Expected array with at least one element, but was empty.'
|
16
|
+
raise(EmptyColumnsError, err_msg)
|
17
|
+
end
|
18
|
+
|
15
19
|
@row_builder = RowBuilder.new(*columns)
|
16
|
-
@builder = builder
|
20
|
+
@builder = builder
|
17
21
|
@columns = columns
|
18
22
|
end
|
19
23
|
|
@@ -26,27 +30,15 @@ module HoneyFormat
|
|
26
30
|
# r.build(['1']).id #=> '1'
|
27
31
|
def build(row)
|
28
32
|
built_row = @row_builder.call(row)
|
33
|
+
return built_row unless @builder
|
29
34
|
@builder.call(built_row)
|
30
35
|
rescue ArgumentError, 'struct size differs'
|
31
|
-
fail_for_struct_size_diff!(row)
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def validate_columns!(columns)
|
37
|
-
return unless columns.empty?
|
38
|
-
|
39
|
-
err_msg = 'Expected array with at least one element, but was empty.'
|
40
|
-
fail(EmptyColumnsError, err_msg)
|
41
|
-
end
|
42
|
-
|
43
|
-
def fail_for_struct_size_diff!(row)
|
44
36
|
err_msg = [
|
45
37
|
"Row length #{row.length}",
|
46
38
|
"for columns #{@columns.length}",
|
47
39
|
"row: #{row.inspect}"
|
48
40
|
].join(', ')
|
49
|
-
|
41
|
+
raise(InvalidRowLengthError, err_msg)
|
50
42
|
end
|
51
43
|
end
|
52
44
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
module HoneyFormat
|
2
2
|
# Default row builder
|
3
3
|
class RowBuilder < Struct
|
4
|
+
# Create a row
|
4
5
|
# @return [Struct] returns an instantiated Struct representing a row
|
5
6
|
def self.call(row)
|
6
7
|
new(*row)
|
7
8
|
end
|
8
9
|
|
10
|
+
# Represent row as CSV
|
9
11
|
# @return [String] CSV-string representation.
|
10
12
|
def to_csv
|
11
13
|
members.map do |column_name|
|
data/lib/honey_format/rows.rb
CHANGED
@@ -13,7 +13,7 @@ module HoneyFormat
|
|
13
13
|
@rows = prepare_rows(Row.new(columns, builder: builder), rows)
|
14
14
|
end
|
15
15
|
|
16
|
-
# @yield [row] The given block will be passed every row.
|
16
|
+
# @yield [row] The given block will be passed for every row.
|
17
17
|
# @yieldparam [Row] a row in the CSV file.
|
18
18
|
# @return [Enumerator]
|
19
19
|
# If no block is given, an enumerator object will be returned.
|
@@ -38,8 +38,8 @@ module HoneyFormat
|
|
38
38
|
built_rows = []
|
39
39
|
rows.each do |row|
|
40
40
|
# ignore empty rows - the Ruby CSV library can return empty lines as [nil]
|
41
|
-
next if row.
|
42
|
-
built_rows << builder.build(
|
41
|
+
next if row.nil? || row.empty? || row == [nil]
|
42
|
+
built_rows << builder.build(row)
|
43
43
|
end
|
44
44
|
built_rows
|
45
45
|
end
|
data/lib/honey_format/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: honey_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- bin/setup
|
118
118
|
- honey_format.gemspec
|
119
119
|
- lib/honey_format.rb
|
120
|
-
- lib/honey_format/columns.rb
|
121
120
|
- lib/honey_format/convert_header_value.rb
|
122
121
|
- lib/honey_format/csv.rb
|
123
122
|
- lib/honey_format/exceptions.rb
|
@@ -125,7 +124,6 @@ files:
|
|
125
124
|
- lib/honey_format/row.rb
|
126
125
|
- lib/honey_format/row_builder.rb
|
127
126
|
- lib/honey_format/rows.rb
|
128
|
-
- lib/honey_format/sanitize.rb
|
129
127
|
- lib/honey_format/version.rb
|
130
128
|
homepage: https://github.com/buren/honey_format
|
131
129
|
licenses:
|
data/lib/honey_format/columns.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'honey_format/convert_header_value'
|
2
|
-
|
3
|
-
module HoneyFormat
|
4
|
-
# Represents columns.
|
5
|
-
class Columns
|
6
|
-
# @return [Columns] a new instance of Columns.
|
7
|
-
# @param [Array] header array of strings.
|
8
|
-
# @param [Array] valid array of symbols representing valid columns.
|
9
|
-
# @raise [MissingCSVHeaderColumnError] raised when header is missing
|
10
|
-
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
11
|
-
def initialize(header, valid: :all, converter: ConvertHeaderValue)
|
12
|
-
@converter = converter
|
13
|
-
@columns = build_columns(header, valid)
|
14
|
-
end
|
15
|
-
|
16
|
-
# Returns columns as array.
|
17
|
-
# @return [Array] of columns.
|
18
|
-
def to_a
|
19
|
-
@columns
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def build_columns(header, valid)
|
25
|
-
header.map do |column|
|
26
|
-
column = @converter.call(column.dup)
|
27
|
-
validate_column_presence!(column)
|
28
|
-
|
29
|
-
validate_column_name!(column, valid)
|
30
|
-
column
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def validate_column_presence!(col)
|
35
|
-
if col.nil? || col.empty?
|
36
|
-
fail(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def validate_column_name!(column, valid)
|
41
|
-
return if valid == :all
|
42
|
-
|
43
|
-
valid.include?(column) ||
|
44
|
-
begin
|
45
|
-
err_msg = "column :#{column} not in #{valid.inspect}"
|
46
|
-
fail(UnknownCSVHeaderColumnError, err_msg)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
module HoneyFormat
|
2
|
-
# Utility class for sanitizing various simple data types.
|
3
|
-
class Sanitize
|
4
|
-
# Returns array of cleaned strings.
|
5
|
-
# @return [Array<String>] the cleaned array of strings.
|
6
|
-
# @param [Array<String>] row the array of strings to be cleaned.
|
7
|
-
# @example Sanitize array
|
8
|
-
# Sanitize.array([" a "]) #=> ["a"]
|
9
|
-
def self.array(row)
|
10
|
-
row.map { |column| string(column) }
|
11
|
-
end
|
12
|
-
|
13
|
-
# Returns array of cleaned elements.
|
14
|
-
# @return [String] the cleaned array.
|
15
|
-
# @param [String] column the string to be cleaned.
|
16
|
-
# @example Sanitize string
|
17
|
-
# Sanitize.string(" a ") #=> "a"
|
18
|
-
# @example Sanitize nil
|
19
|
-
# Sanitize.string(nil) #=> nil
|
20
|
-
def self.string(column)
|
21
|
-
return column if column.nil?
|
22
|
-
column.strip
|
23
|
-
end
|
24
|
-
|
25
|
-
# Returns mutated array of cleaned strings.
|
26
|
-
# @return [Array<String>] the cleaned array of strings.
|
27
|
-
# @param [Array<String>] row the array of strings to be cleaned.
|
28
|
-
# @example Sanitize array
|
29
|
-
# Sanitize.array!([" a "]) #=> ["a"]
|
30
|
-
def self.array!(row)
|
31
|
-
row.map! { |column| string!(column) }
|
32
|
-
end
|
33
|
-
|
34
|
-
# Returns mutated and cleaned string.
|
35
|
-
# @return [String] the cleaned array.
|
36
|
-
# @param [String] column the string to be cleaned.
|
37
|
-
# @example Sanitize string
|
38
|
-
# Sanitize.string!(" a ") #=> "a"
|
39
|
-
# @example Sanitize nil
|
40
|
-
# Sanitize.string!(nil) #=> nil
|
41
|
-
def self.string!(column)
|
42
|
-
return if column.nil?
|
43
|
-
column.tap(&:strip!)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|