honey_format 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +47 -11
- data/lib/honey_format/csv.rb +15 -9
- data/lib/honey_format/header.rb +60 -13
- data/lib/honey_format/row.rb +8 -16
- data/lib/honey_format/row_builder.rb +2 -0
- data/lib/honey_format/rows.rb +3 -3
- data/lib/honey_format/version.rb +1 -1
- metadata +2 -4
- data/lib/honey_format/columns.rb +0 -50
- data/lib/honey_format/sanitize.rb +0 -46
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c3b35ed8d8cac5eace438a1eb5365b4970d96e2fee1a3faece4555eae44f1a3
|
4
|
+
data.tar.gz: 01ed3a3b5bc973b5882c6ec2f61c647bf24d0edadd76c7155568e2a6c2cbcbbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af026b70d5fa369633788e5be0796723161b8564aca45084e704fa0691ad7da1fbe1a4578262dec60806d71fe1757984630c8d3e7be1fc4df276b0c3bf3cac1f
|
7
|
+
data.tar.gz: 267b348cff25cffb1204e26ec4f34f60fafb4154b44afa6005599bfd71ab8946024be7fd56deca52ff5c5ee31f53fe4f4ab79fd25ead5df0ad79ece91cfdeb5d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# v0.7.0
|
2
|
+
|
3
|
+
- Don't sanitize each row :rocket: (improves performance from ~1.4x times slower than raw CSV to ~1.1)
|
4
|
+
- Fold `Columns` class into `Header`
|
5
|
+
- Remove `Sanitize` class
|
6
|
+
|
1
7
|
# v0.6.0
|
2
8
|
|
3
9
|
* Add `CSV#to_csv` ([PR#2](https://github.com/buren/honey_format/pull/2))
|
data/README.md
CHANGED
@@ -49,11 +49,30 @@ user.id # => "1"
|
|
49
49
|
user.username # => "buren"
|
50
50
|
```
|
51
51
|
|
52
|
-
|
52
|
+
Minimal custom row builder
|
53
53
|
```ruby
|
54
54
|
csv_string = "Id, Username\n 1, buren"
|
55
|
-
|
56
|
-
csv = HoneyFormat::CSV.new(csv_string, row_builder:
|
55
|
+
upcaser = ->(row) { row.username.upcase!; row }
|
56
|
+
csv = HoneyFormat::CSV.new(csv_string, row_builder: upcaser)
|
57
|
+
csv.rows # => [#<struct id="1", username="BUREN">]
|
58
|
+
```
|
59
|
+
|
60
|
+
Complete custom row builder
|
61
|
+
```ruby
|
62
|
+
class Anonymizer
|
63
|
+
def self.call(row)
|
64
|
+
# Return an object you want to represent the row
|
65
|
+
row.tap do |r|
|
66
|
+
r.name = '<anon>'
|
67
|
+
r.email = '<anon>'
|
68
|
+
r.ssn = '<anon>'
|
69
|
+
r.payment_id = '<scrubbed>'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
csv_string = "Id, Username\n 1, buren"
|
75
|
+
csv = HoneyFormat::CSV.new(csv_string, row_builder: Anonymizer)
|
57
76
|
csv.rows # => [#<struct id="1", username="BUREN">]
|
58
77
|
```
|
59
78
|
|
@@ -65,6 +84,11 @@ csv.rows.each { |row| row.id = nil }
|
|
65
84
|
csv.to_csv # => "Id, Username\n,buren\n"
|
66
85
|
```
|
67
86
|
|
87
|
+
You can of course set the delimiter
|
88
|
+
```ruby
|
89
|
+
HoneyFormat::CSV.new(csv_string, delimiter: ';')
|
90
|
+
```
|
91
|
+
|
68
92
|
Validate CSV header
|
69
93
|
```ruby
|
70
94
|
csv_string = "Id, Username\n 1, buren"
|
@@ -85,17 +109,29 @@ csv.rows.first.username # => "buren"
|
|
85
109
|
```
|
86
110
|
|
87
111
|
If your header contains special chars and/or chars that can't be part of Ruby method names,
|
88
|
-
things get a little awkward..
|
112
|
+
things can get a little awkward..
|
89
113
|
```ruby
|
90
114
|
csv_string = "ÅÄÖ\nSwedish characters"
|
91
115
|
user = HoneyFormat::CSV.new(csv_string).rows.first
|
92
|
-
# Note that these chars aren't "downcased",
|
116
|
+
# Note that these chars aren't "downcased" in Ruby 2.3 and older versions of Ruby,
|
93
117
|
# "ÅÄÖ".downcase # => "ÅÄÖ"
|
94
118
|
user.ÅÄÖ # => "Swedish characters"
|
119
|
+
# while on Ruby > 2.3
|
120
|
+
user.åäö
|
95
121
|
|
96
|
-
csv_string = "First
|
122
|
+
csv_string = "First^Name\nJacob"
|
97
123
|
user = HoneyFormat::CSV.new(csv_string).rows.first
|
98
|
-
user.public_send(:"first
|
124
|
+
user.public_send(:"first^name") # => "Jacob"
|
125
|
+
```
|
126
|
+
|
127
|
+
Pass your own header converter
|
128
|
+
```ruby
|
129
|
+
map = { 'First^Name' => :first_name }
|
130
|
+
converter = ->(column) { map.fetch(column, column) }
|
131
|
+
|
132
|
+
csv_string = "First^Name\nJacob"
|
133
|
+
user = HoneyFormat::CSV.new(csv_string, header_converter: converter).rows.first
|
134
|
+
user.first_name # => "Jacob"
|
99
135
|
```
|
100
136
|
|
101
137
|
If you want to see more usage examples check out the `spec/` directory.
|
@@ -113,15 +149,15 @@ $ bin/benchmark file.csv
|
|
113
149
|
204KB (1k lines)
|
114
150
|
|
115
151
|
```
|
116
|
-
stdlib CSV:
|
117
|
-
HoneyFormat::CSV:
|
152
|
+
stdlib CSV: 51.9 i/s
|
153
|
+
HoneyFormat::CSV: 49.6 i/s - 1.05x slower
|
118
154
|
```
|
119
155
|
|
120
156
|
19MB (100k lines)
|
121
157
|
|
122
158
|
```
|
123
|
-
stdlib CSV:
|
124
|
-
HoneyFormat::CSV:
|
159
|
+
stdlib CSV: 0.4 i/s
|
160
|
+
HoneyFormat::CSV: 0.4 i/s - 1.11x slower
|
125
161
|
```
|
126
162
|
|
127
163
|
## Development
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'csv'
|
2
2
|
|
3
|
-
require 'honey_format/sanitize'
|
4
3
|
require 'honey_format/rows'
|
5
4
|
require 'honey_format/header'
|
6
5
|
|
@@ -9,27 +8,29 @@ module HoneyFormat
|
|
9
8
|
class CSV
|
10
9
|
# @return [CSV] a new instance of CSV.
|
11
10
|
# @param [String] csv string.
|
12
|
-
# @param [Array] valid_columns valid array of symbols representing valid columns.
|
13
|
-
# @param [Array] header optional argument for CSV header
|
11
|
+
# @param [Array<Symbol>] valid_columns valid array of symbols representing valid columns if empty all will be considered valid.
|
12
|
+
# @param [Array<String>] header optional argument for CSV header
|
14
13
|
# @param [#call] row_builder will be called for each parsed row
|
15
14
|
# @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
|
16
15
|
# @raise [MissingCSVHeaderColumnError] raised when header column is missing.
|
17
16
|
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
18
|
-
def initialize(csv, delimiter: ',', header: nil, valid_columns:
|
17
|
+
def initialize(csv, delimiter: ',', header: nil, valid_columns: [], header_converter: ConvertHeaderValue, row_builder: nil)
|
19
18
|
csv = ::CSV.parse(csv, col_sep: delimiter)
|
20
19
|
header_row = header || csv.shift
|
21
20
|
@header = Header.new(header_row, valid: valid_columns, converter: header_converter)
|
22
21
|
@rows = Rows.new(csv, columns, builder: row_builder)
|
23
22
|
end
|
24
23
|
|
25
|
-
#
|
24
|
+
# Original CSV header
|
25
|
+
# @return [Array<String>] of strings for sanitized header.
|
26
26
|
def header
|
27
|
-
@header.
|
27
|
+
@header.original
|
28
28
|
end
|
29
29
|
|
30
|
-
#
|
30
|
+
# CSV columns
|
31
|
+
# @return [Array<Symbol>] of column identifiers.
|
31
32
|
def columns
|
32
|
-
@header.
|
33
|
+
@header.to_a
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [Array] of rows.
|
@@ -38,11 +39,16 @@ module HoneyFormat
|
|
38
39
|
@rows
|
39
40
|
end
|
40
41
|
|
41
|
-
# @yield [row] block
|
42
|
+
# @yield [row] The given block will be passed for every row.
|
43
|
+
# @yieldparam [Row] a colmn in the CSV header.
|
44
|
+
# @return [Enumerator] If no block is given, an enumerator object will be returned.
|
42
45
|
def each_row
|
46
|
+
return rows.each unless block_given?
|
47
|
+
|
43
48
|
rows.each { |row| yield(row) }
|
44
49
|
end
|
45
50
|
|
51
|
+
# Convert CSV object as CSV-string.
|
46
52
|
# @return [String] CSV-string representation.
|
47
53
|
def to_csv
|
48
54
|
header.to_csv + @rows.to_csv
|
data/lib/honey_format/header.rb
CHANGED
@@ -1,32 +1,79 @@
|
|
1
|
-
require 'honey_format/
|
1
|
+
require 'honey_format/convert_header_value'
|
2
2
|
|
3
3
|
module HoneyFormat
|
4
4
|
# Represents a header
|
5
5
|
class Header
|
6
|
-
|
6
|
+
include Enumerable
|
7
7
|
|
8
|
+
# Instantiate a Header
|
8
9
|
# @return [Header] a new instance of Header.
|
9
|
-
# @param [Array] header array of strings.
|
10
|
-
# @param [Array] valid array of symbols representing valid columns.
|
11
|
-
# @
|
12
|
-
|
10
|
+
# @param [Array<String>] header array of strings.
|
11
|
+
# @param [Array<Symbol>] valid array of symbols representing valid columns if empty all columns will be considered valid.
|
12
|
+
# @param converter [#call] header converter that implements a #call method that takes one column (string) argument.
|
13
|
+
# @raise [MissingCSVHeaderColumnError] raised when header is missing
|
14
|
+
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
15
|
+
# @example Instantiate a header with a customer converter
|
16
|
+
# converter = ->(col) { col == 'username' ? 'handle' : col }
|
17
|
+
# header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
|
18
|
+
# header.to_a # => ['name', 'handle']
|
19
|
+
def initialize(header, valid: [], converter: ConvertHeaderValue)
|
13
20
|
if header.nil? || header.empty?
|
14
|
-
|
21
|
+
raise(MissingCSVHeaderError, "CSV header can't be empty.")
|
15
22
|
end
|
16
23
|
|
17
|
-
@
|
18
|
-
@
|
24
|
+
@original_header = header.map { |col| col ? col.strip : nil }
|
25
|
+
@converter = converter
|
26
|
+
@columns = build_columns(@original_header, valid)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Array<String>] the original header
|
30
|
+
def original
|
31
|
+
@original_header
|
32
|
+
end
|
33
|
+
|
34
|
+
# @yield [row] The given block will be passed for every column.
|
35
|
+
# @yieldparam [Row] a colmn in the CSV header.
|
36
|
+
# @return [Enumerator]
|
37
|
+
# If no block is given, an enumerator object will be returned.
|
38
|
+
def each(&block)
|
39
|
+
@columns.each(&block)
|
19
40
|
end
|
20
41
|
|
21
42
|
# Returns columns as array.
|
22
|
-
# @return [Array] of columns.
|
23
|
-
def
|
24
|
-
@columns
|
43
|
+
# @return [Array<Symbol>] of columns.
|
44
|
+
def to_a
|
45
|
+
@columns
|
25
46
|
end
|
26
47
|
|
48
|
+
# Header as CSV-string
|
27
49
|
# @return [String] CSV-string representation.
|
28
50
|
def to_csv
|
29
|
-
columns.to_csv
|
51
|
+
@columns.to_csv
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Convert original header
|
57
|
+
# @param [Array<String>] header the original header
|
58
|
+
# @param [Array<Symbol>] valid list of valid column names if empty all are considered valid.
|
59
|
+
# @return [Array<String>] converted columns
|
60
|
+
def build_columns(header, valid)
|
61
|
+
valid = valid.map(&:to_sym)
|
62
|
+
|
63
|
+
header.map do |column|
|
64
|
+
column = @converter.call(column.dup)
|
65
|
+
|
66
|
+
if column.nil? || column.empty?
|
67
|
+
raise(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
|
68
|
+
end
|
69
|
+
|
70
|
+
if valid.any? && !valid.include?(column)
|
71
|
+
err_msg = "column :#{column} not in #{valid.inspect}"
|
72
|
+
raise(UnknownCSVHeaderColumnError, err_msg)
|
73
|
+
end
|
74
|
+
|
75
|
+
column
|
76
|
+
end
|
30
77
|
end
|
31
78
|
end
|
32
79
|
end
|
data/lib/honey_format/row.rb
CHANGED
@@ -11,9 +11,13 @@ module HoneyFormat
|
|
11
11
|
# @example Create new row
|
12
12
|
# Row.new!([:id])
|
13
13
|
def initialize(columns, builder: nil)
|
14
|
-
|
14
|
+
if columns.empty?
|
15
|
+
err_msg = 'Expected array with at least one element, but was empty.'
|
16
|
+
raise(EmptyColumnsError, err_msg)
|
17
|
+
end
|
18
|
+
|
15
19
|
@row_builder = RowBuilder.new(*columns)
|
16
|
-
@builder = builder
|
20
|
+
@builder = builder
|
17
21
|
@columns = columns
|
18
22
|
end
|
19
23
|
|
@@ -26,27 +30,15 @@ module HoneyFormat
|
|
26
30
|
# r.build(['1']).id #=> '1'
|
27
31
|
def build(row)
|
28
32
|
built_row = @row_builder.call(row)
|
33
|
+
return built_row unless @builder
|
29
34
|
@builder.call(built_row)
|
30
35
|
rescue ArgumentError, 'struct size differs'
|
31
|
-
fail_for_struct_size_diff!(row)
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def validate_columns!(columns)
|
37
|
-
return unless columns.empty?
|
38
|
-
|
39
|
-
err_msg = 'Expected array with at least one element, but was empty.'
|
40
|
-
fail(EmptyColumnsError, err_msg)
|
41
|
-
end
|
42
|
-
|
43
|
-
def fail_for_struct_size_diff!(row)
|
44
36
|
err_msg = [
|
45
37
|
"Row length #{row.length}",
|
46
38
|
"for columns #{@columns.length}",
|
47
39
|
"row: #{row.inspect}"
|
48
40
|
].join(', ')
|
49
|
-
|
41
|
+
raise(InvalidRowLengthError, err_msg)
|
50
42
|
end
|
51
43
|
end
|
52
44
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
module HoneyFormat
|
2
2
|
# Default row builder
|
3
3
|
class RowBuilder < Struct
|
4
|
+
# Create a row
|
4
5
|
# @return [Struct] returns an instantiated Struct representing a row
|
5
6
|
def self.call(row)
|
6
7
|
new(*row)
|
7
8
|
end
|
8
9
|
|
10
|
+
# Represent row as CSV
|
9
11
|
# @return [String] CSV-string representation.
|
10
12
|
def to_csv
|
11
13
|
members.map do |column_name|
|
data/lib/honey_format/rows.rb
CHANGED
@@ -13,7 +13,7 @@ module HoneyFormat
|
|
13
13
|
@rows = prepare_rows(Row.new(columns, builder: builder), rows)
|
14
14
|
end
|
15
15
|
|
16
|
-
# @yield [row] The given block will be passed every row.
|
16
|
+
# @yield [row] The given block will be passed for every row.
|
17
17
|
# @yieldparam [Row] a row in the CSV file.
|
18
18
|
# @return [Enumerator]
|
19
19
|
# If no block is given, an enumerator object will be returned.
|
@@ -38,8 +38,8 @@ module HoneyFormat
|
|
38
38
|
built_rows = []
|
39
39
|
rows.each do |row|
|
40
40
|
# ignore empty rows - the Ruby CSV library can return empty lines as [nil]
|
41
|
-
next if row.
|
42
|
-
built_rows << builder.build(
|
41
|
+
next if row.nil? || row.empty? || row == [nil]
|
42
|
+
built_rows << builder.build(row)
|
43
43
|
end
|
44
44
|
built_rows
|
45
45
|
end
|
data/lib/honey_format/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: honey_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- bin/setup
|
118
118
|
- honey_format.gemspec
|
119
119
|
- lib/honey_format.rb
|
120
|
-
- lib/honey_format/columns.rb
|
121
120
|
- lib/honey_format/convert_header_value.rb
|
122
121
|
- lib/honey_format/csv.rb
|
123
122
|
- lib/honey_format/exceptions.rb
|
@@ -125,7 +124,6 @@ files:
|
|
125
124
|
- lib/honey_format/row.rb
|
126
125
|
- lib/honey_format/row_builder.rb
|
127
126
|
- lib/honey_format/rows.rb
|
128
|
-
- lib/honey_format/sanitize.rb
|
129
127
|
- lib/honey_format/version.rb
|
130
128
|
homepage: https://github.com/buren/honey_format
|
131
129
|
licenses:
|
data/lib/honey_format/columns.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'honey_format/convert_header_value'
|
2
|
-
|
3
|
-
module HoneyFormat
|
4
|
-
# Represents columns.
|
5
|
-
class Columns
|
6
|
-
# @return [Columns] a new instance of Columns.
|
7
|
-
# @param [Array] header array of strings.
|
8
|
-
# @param [Array] valid array of symbols representing valid columns.
|
9
|
-
# @raise [MissingCSVHeaderColumnError] raised when header is missing
|
10
|
-
# @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
|
11
|
-
def initialize(header, valid: :all, converter: ConvertHeaderValue)
|
12
|
-
@converter = converter
|
13
|
-
@columns = build_columns(header, valid)
|
14
|
-
end
|
15
|
-
|
16
|
-
# Returns columns as array.
|
17
|
-
# @return [Array] of columns.
|
18
|
-
def to_a
|
19
|
-
@columns
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def build_columns(header, valid)
|
25
|
-
header.map do |column|
|
26
|
-
column = @converter.call(column.dup)
|
27
|
-
validate_column_presence!(column)
|
28
|
-
|
29
|
-
validate_column_name!(column, valid)
|
30
|
-
column
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def validate_column_presence!(col)
|
35
|
-
if col.nil? || col.empty?
|
36
|
-
fail(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def validate_column_name!(column, valid)
|
41
|
-
return if valid == :all
|
42
|
-
|
43
|
-
valid.include?(column) ||
|
44
|
-
begin
|
45
|
-
err_msg = "column :#{column} not in #{valid.inspect}"
|
46
|
-
fail(UnknownCSVHeaderColumnError, err_msg)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
module HoneyFormat
|
2
|
-
# Utility class for sanitizing various simple data types.
|
3
|
-
class Sanitize
|
4
|
-
# Returns array of cleaned strings.
|
5
|
-
# @return [Array<String>] the cleaned array of strings.
|
6
|
-
# @param [Array<String>] row the array of strings to be cleaned.
|
7
|
-
# @example Sanitize array
|
8
|
-
# Sanitize.array([" a "]) #=> ["a"]
|
9
|
-
def self.array(row)
|
10
|
-
row.map { |column| string(column) }
|
11
|
-
end
|
12
|
-
|
13
|
-
# Returns array of cleaned elements.
|
14
|
-
# @return [String] the cleaned array.
|
15
|
-
# @param [String] column the string to be cleaned.
|
16
|
-
# @example Sanitize string
|
17
|
-
# Sanitize.string(" a ") #=> "a"
|
18
|
-
# @example Sanitize nil
|
19
|
-
# Sanitize.string(nil) #=> nil
|
20
|
-
def self.string(column)
|
21
|
-
return column if column.nil?
|
22
|
-
column.strip
|
23
|
-
end
|
24
|
-
|
25
|
-
# Returns mutated array of cleaned strings.
|
26
|
-
# @return [Array<String>] the cleaned array of strings.
|
27
|
-
# @param [Array<String>] row the array of strings to be cleaned.
|
28
|
-
# @example Sanitize array
|
29
|
-
# Sanitize.array!([" a "]) #=> ["a"]
|
30
|
-
def self.array!(row)
|
31
|
-
row.map! { |column| string!(column) }
|
32
|
-
end
|
33
|
-
|
34
|
-
# Returns mutated and cleaned string.
|
35
|
-
# @return [String] the cleaned array.
|
36
|
-
# @param [String] column the string to be cleaned.
|
37
|
-
# @example Sanitize string
|
38
|
-
# Sanitize.string!(" a ") #=> "a"
|
39
|
-
# @example Sanitize nil
|
40
|
-
# Sanitize.string!(nil) #=> nil
|
41
|
-
def self.string!(column)
|
42
|
-
return if column.nil?
|
43
|
-
column.tap(&:strip!)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|