honey_format 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6ec17df72ebcb22be2365a454c19632f6ee22becc155a525bf66a010f7eec4b2
4
- data.tar.gz: b05efbc59d1af2da443fd4c3e72cf573d9fd43b292b644336299411e705a77e3
3
+ metadata.gz: 1c3b35ed8d8cac5eace438a1eb5365b4970d96e2fee1a3faece4555eae44f1a3
4
+ data.tar.gz: 01ed3a3b5bc973b5882c6ec2f61c647bf24d0edadd76c7155568e2a6c2cbcbbb
5
5
  SHA512:
6
- metadata.gz: b802e64d2033b5ea9881b30142e88f9d326e7d89416deadd7aa3c2f49c6da3f5b7a03fda057a802a9cebff28c72d5c75cfe48e9fa3a93e68ac808beb480b3d42
7
- data.tar.gz: 71682b5a17dae461bbdabece49219e76be07241494b3606849ec5e7198bcf9e6f8e470a0e5a857ff879d5cc48ea796214a93d3ac0793cffb3feb29842e15486e
6
+ metadata.gz: af026b70d5fa369633788e5be0796723161b8564aca45084e704fa0691ad7da1fbe1a4578262dec60806d71fe1757984630c8d3e7be1fc4df276b0c3bf3cac1f
7
+ data.tar.gz: 267b348cff25cffb1204e26ec4f34f60fafb4154b44afa6005599bfd71ab8946024be7fd56deca52ff5c5ee31f53fe4f4ab79fd25ead5df0ad79ece91cfdeb5d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # v0.7.0
2
+
3
+ - Don't sanitize each row :rocket: (improves performance from ~1.4x times slower than raw CSV to ~1.1)
4
+ - Fold `Columns` class into `Header`
5
+ - Remove `Sanitize` class
6
+
1
7
  # v0.6.0
2
8
 
3
9
  * Add `CSV#to_csv` ([PR#2](https://github.com/buren/honey_format/pull/2))
data/README.md CHANGED
@@ -49,11 +49,30 @@ user.id # => "1"
49
49
  user.username # => "buren"
50
50
  ```
51
51
 
52
- Custom row builder
52
+ Minimal custom row builder
53
53
  ```ruby
54
54
  csv_string = "Id, Username\n 1, buren"
55
- upcase_builder = ->(o) { o.is_a?(String) ? o.upcase : o }
56
- csv = HoneyFormat::CSV.new(csv_string, row_builder: upcase_builder)
55
+ upcaser = ->(row) { row.username.upcase!; row }
56
+ csv = HoneyFormat::CSV.new(csv_string, row_builder: upcaser)
57
+ csv.rows # => [#<struct id="1", username="BUREN">]
58
+ ```
59
+
60
+ Complete custom row builder
61
+ ```ruby
62
+ class Anonymizer
63
+ def self.call(row)
64
+ # Return an object you want to represent the row
65
+ row.tap do |r|
66
+ r.name = '<anon>'
67
+ r.email = '<anon>'
68
+ r.ssn = '<anon>'
69
+ r.payment_id = '<scrubbed>'
70
+ end
71
+ end
72
+ end
73
+
74
+ csv_string = "Id, Username\n 1, buren"
75
+ csv = HoneyFormat::CSV.new(csv_string, row_builder: Anonymizer)
57
76
  csv.rows # => [#<struct id="1", username="BUREN">]
58
77
  ```
59
78
 
@@ -65,6 +84,11 @@ csv.rows.each { |row| row.id = nil }
65
84
  csv.to_csv # => "Id, Username\n,buren\n"
66
85
  ```
67
86
 
87
+ You can of course set the delimiter
88
+ ```ruby
89
+ HoneyFormat::CSV.new(csv_string, delimiter: ';')
90
+ ```
91
+
68
92
  Validate CSV header
69
93
  ```ruby
70
94
  csv_string = "Id, Username\n 1, buren"
@@ -85,17 +109,29 @@ csv.rows.first.username # => "buren"
85
109
  ```
86
110
 
87
111
  If your header contains special chars and/or chars that can't be part of Ruby method names,
88
- things get a little awkward..
112
+ things can get a little awkward..
89
113
  ```ruby
90
114
  csv_string = "ÅÄÖ\nSwedish characters"
91
115
  user = HoneyFormat::CSV.new(csv_string).rows.first
92
- # Note that these chars aren't "downcased",
116
+ # Note that these chars aren't "downcased" in Ruby 2.3 and older versions of Ruby,
93
117
  # "ÅÄÖ".downcase # => "ÅÄÖ"
94
118
  user.ÅÄÖ # => "Swedish characters"
119
+ # while on Ruby > 2.3
120
+ user.åäö
95
121
 
96
- csv_string = "First-Name\nJacob"
122
+ csv_string = "First^Name\nJacob"
97
123
  user = HoneyFormat::CSV.new(csv_string).rows.first
98
- user.public_send(:"first-name") # => "Jacob"
124
+ user.public_send(:"first^name") # => "Jacob"
125
+ ```
126
+
127
+ Pass your own header converter
128
+ ```ruby
129
+ map = { 'First^Name' => :first_name }
130
+ converter = ->(column) { map.fetch(column, column) }
131
+
132
+ csv_string = "First^Name\nJacob"
133
+ user = HoneyFormat::CSV.new(csv_string, header_converter: converter).rows.first
134
+ user.first_name # => "Jacob"
99
135
  ```
100
136
 
101
137
  If you want to see more usage examples check out the `spec/` directory.
@@ -113,15 +149,15 @@ $ bin/benchmark file.csv
113
149
  204KB (1k lines)
114
150
 
115
151
  ```
116
- stdlib CSV: 48.9 i/s
117
- HoneyFormat::CSV: 34.5 i/s - 1.41x slower
152
+ stdlib CSV: 51.9 i/s
153
+ HoneyFormat::CSV: 49.6 i/s - 1.05x slower
118
154
  ```
119
155
 
120
156
  19MB (100k lines)
121
157
 
122
158
  ```
123
- stdlib CSV: 0.4 i/s
124
- HoneyFormat::CSV: 0.3 i/s - 1.41x slower
159
+ stdlib CSV: 0.4 i/s
160
+ HoneyFormat::CSV: 0.4 i/s - 1.11x slower
125
161
  ```
126
162
 
127
163
  ## Development
@@ -1,6 +1,5 @@
1
1
  require 'csv'
2
2
 
3
- require 'honey_format/sanitize'
4
3
  require 'honey_format/rows'
5
4
  require 'honey_format/header'
6
5
 
@@ -9,27 +8,29 @@ module HoneyFormat
9
8
  class CSV
10
9
  # @return [CSV] a new instance of CSV.
11
10
  # @param [String] csv string.
12
- # @param [Array] valid_columns valid array of symbols representing valid columns.
13
- # @param [Array] header optional argument for CSV header
11
+ # @param [Array<Symbol>] valid_columns valid array of symbols representing valid columns if empty all will be considered valid.
12
+ # @param [Array<String>] header optional argument for CSV header
14
13
  # @param [#call] row_builder will be called for each parsed row
15
14
  # @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
16
15
  # @raise [MissingCSVHeaderColumnError] raised when header column is missing.
17
16
  # @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
18
- def initialize(csv, delimiter: ',', header: nil, valid_columns: :all, header_converter: ConvertHeaderValue, row_builder: nil)
17
+ def initialize(csv, delimiter: ',', header: nil, valid_columns: [], header_converter: ConvertHeaderValue, row_builder: nil)
19
18
  csv = ::CSV.parse(csv, col_sep: delimiter)
20
19
  header_row = header || csv.shift
21
20
  @header = Header.new(header_row, valid: valid_columns, converter: header_converter)
22
21
  @rows = Rows.new(csv, columns, builder: row_builder)
23
22
  end
24
23
 
25
- # @return [Array] of strings for sanitized header.
24
+ # Original CSV header
25
+ # @return [Array<String>] of strings for sanitized header.
26
26
  def header
27
- @header.column_names
27
+ @header.original
28
28
  end
29
29
 
30
- # @return [Array] of column identifiers.
30
+ # CSV columns
31
+ # @return [Array<Symbol>] of column identifiers.
31
32
  def columns
32
- @header.columns
33
+ @header.to_a
33
34
  end
34
35
 
35
36
  # @return [Array] of rows.
@@ -38,11 +39,16 @@ module HoneyFormat
38
39
  @rows
39
40
  end
40
41
 
41
- # @yield [row] block to receive the row.
42
+ # @yield [row] The given block will be passed for every row.
43
+ # @yieldparam [Row] a colmn in the CSV header.
44
+ # @return [Enumerator] If no block is given, an enumerator object will be returned.
42
45
  def each_row
46
+ return rows.each unless block_given?
47
+
43
48
  rows.each { |row| yield(row) }
44
49
  end
45
50
 
51
+ # Convert CSV object as CSV-string.
46
52
  # @return [String] CSV-string representation.
47
53
  def to_csv
48
54
  header.to_csv + @rows.to_csv
@@ -1,32 +1,79 @@
1
- require 'honey_format/columns'
1
+ require 'honey_format/convert_header_value'
2
2
 
3
3
  module HoneyFormat
4
4
  # Represents a header
5
5
  class Header
6
- attr_reader :column_names
6
+ include Enumerable
7
7
 
8
+ # Instantiate a Header
8
9
  # @return [Header] a new instance of Header.
9
- # @param [Array] header array of strings.
10
- # @param [Array] valid array of symbols representing valid columns.
11
- # @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
12
- def initialize(header, valid: :all, converter: ConvertHeaderValue)
10
+ # @param [Array<String>] header array of strings.
11
+ # @param [Array<Symbol>] valid array of symbols representing valid columns if empty all columns will be considered valid.
12
+ # @param converter [#call] header converter that implements a #call method that takes one column (string) argument.
13
+ # @raise [MissingCSVHeaderColumnError] raised when header is missing
14
+ # @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
15
+ # @example Instantiate a header with a customer converter
16
+ # converter = ->(col) { col == 'username' ? 'handle' : col }
17
+ # header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
18
+ # header.to_a # => ['name', 'handle']
19
+ def initialize(header, valid: [], converter: ConvertHeaderValue)
13
20
  if header.nil? || header.empty?
14
- fail(MissingCSVHeaderError, "CSV header can't be empty.")
21
+ raise(MissingCSVHeaderError, "CSV header can't be empty.")
15
22
  end
16
23
 
17
- @column_names = Sanitize.array(header)
18
- @columns = Columns.new(@column_names, valid: valid, converter: converter)
24
+ @original_header = header.map { |col| col ? col.strip : nil }
25
+ @converter = converter
26
+ @columns = build_columns(@original_header, valid)
27
+ end
28
+
29
+ # @return [Array<String>] the original header
30
+ def original
31
+ @original_header
32
+ end
33
+
34
+ # @yield [row] The given block will be passed for every column.
35
+ # @yieldparam [Row] a colmn in the CSV header.
36
+ # @return [Enumerator]
37
+ # If no block is given, an enumerator object will be returned.
38
+ def each(&block)
39
+ @columns.each(&block)
19
40
  end
20
41
 
21
42
  # Returns columns as array.
22
- # @return [Array] of columns.
23
- def columns
24
- @columns.to_a
43
+ # @return [Array<Symbol>] of columns.
44
+ def to_a
45
+ @columns
25
46
  end
26
47
 
48
+ # Header as CSV-string
27
49
  # @return [String] CSV-string representation.
28
50
  def to_csv
29
- columns.to_csv
51
+ @columns.to_csv
52
+ end
53
+
54
+ private
55
+
56
+ # Convert original header
57
+ # @param [Array<String>] header the original header
58
+ # @param [Array<Symbol>] valid list of valid column names if empty all are considered valid.
59
+ # @return [Array<String>] converted columns
60
+ def build_columns(header, valid)
61
+ valid = valid.map(&:to_sym)
62
+
63
+ header.map do |column|
64
+ column = @converter.call(column.dup)
65
+
66
+ if column.nil? || column.empty?
67
+ raise(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
68
+ end
69
+
70
+ if valid.any? && !valid.include?(column)
71
+ err_msg = "column :#{column} not in #{valid.inspect}"
72
+ raise(UnknownCSVHeaderColumnError, err_msg)
73
+ end
74
+
75
+ column
76
+ end
30
77
  end
31
78
  end
32
79
  end
@@ -11,9 +11,13 @@ module HoneyFormat
11
11
  # @example Create new row
12
12
  # Row.new!([:id])
13
13
  def initialize(columns, builder: nil)
14
- validate_columns!(columns)
14
+ if columns.empty?
15
+ err_msg = 'Expected array with at least one element, but was empty.'
16
+ raise(EmptyColumnsError, err_msg)
17
+ end
18
+
15
19
  @row_builder = RowBuilder.new(*columns)
16
- @builder = builder || ->(row) { row }
20
+ @builder = builder
17
21
  @columns = columns
18
22
  end
19
23
 
@@ -26,27 +30,15 @@ module HoneyFormat
26
30
  # r.build(['1']).id #=> '1'
27
31
  def build(row)
28
32
  built_row = @row_builder.call(row)
33
+ return built_row unless @builder
29
34
  @builder.call(built_row)
30
35
  rescue ArgumentError, 'struct size differs'
31
- fail_for_struct_size_diff!(row)
32
- end
33
-
34
- private
35
-
36
- def validate_columns!(columns)
37
- return unless columns.empty?
38
-
39
- err_msg = 'Expected array with at least one element, but was empty.'
40
- fail(EmptyColumnsError, err_msg)
41
- end
42
-
43
- def fail_for_struct_size_diff!(row)
44
36
  err_msg = [
45
37
  "Row length #{row.length}",
46
38
  "for columns #{@columns.length}",
47
39
  "row: #{row.inspect}"
48
40
  ].join(', ')
49
- fail(InvalidRowLengthError, err_msg)
41
+ raise(InvalidRowLengthError, err_msg)
50
42
  end
51
43
  end
52
44
  end
@@ -1,11 +1,13 @@
1
1
  module HoneyFormat
2
2
  # Default row builder
3
3
  class RowBuilder < Struct
4
+ # Create a row
4
5
  # @return [Struct] returns an instantiated Struct representing a row
5
6
  def self.call(row)
6
7
  new(*row)
7
8
  end
8
9
 
10
+ # Represent row as CSV
9
11
  # @return [String] CSV-string representation.
10
12
  def to_csv
11
13
  members.map do |column_name|
@@ -13,7 +13,7 @@ module HoneyFormat
13
13
  @rows = prepare_rows(Row.new(columns, builder: builder), rows)
14
14
  end
15
15
 
16
- # @yield [row] The given block will be passed every row.
16
+ # @yield [row] The given block will be passed for every row.
17
17
  # @yieldparam [Row] a row in the CSV file.
18
18
  # @return [Enumerator]
19
19
  # If no block is given, an enumerator object will be returned.
@@ -38,8 +38,8 @@ module HoneyFormat
38
38
  built_rows = []
39
39
  rows.each do |row|
40
40
  # ignore empty rows - the Ruby CSV library can return empty lines as [nil]
41
- next if row.empty? || row.nil? || row == [nil]
42
- built_rows << builder.build(Sanitize.array!(row))
41
+ next if row.nil? || row.empty? || row == [nil]
42
+ built_rows << builder.build(row)
43
43
  end
44
44
  built_rows
45
45
  end
@@ -1,4 +1,4 @@
1
1
  module HoneyFormat
2
2
  # Gem version
3
- VERSION = '0.6.0'
3
+ VERSION = '0.7.0'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: honey_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-11 00:00:00.000000000 Z
11
+ date: 2018-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -117,7 +117,6 @@ files:
117
117
  - bin/setup
118
118
  - honey_format.gemspec
119
119
  - lib/honey_format.rb
120
- - lib/honey_format/columns.rb
121
120
  - lib/honey_format/convert_header_value.rb
122
121
  - lib/honey_format/csv.rb
123
122
  - lib/honey_format/exceptions.rb
@@ -125,7 +124,6 @@ files:
125
124
  - lib/honey_format/row.rb
126
125
  - lib/honey_format/row_builder.rb
127
126
  - lib/honey_format/rows.rb
128
- - lib/honey_format/sanitize.rb
129
127
  - lib/honey_format/version.rb
130
128
  homepage: https://github.com/buren/honey_format
131
129
  licenses:
@@ -1,50 +0,0 @@
1
- require 'honey_format/convert_header_value'
2
-
3
- module HoneyFormat
4
- # Represents columns.
5
- class Columns
6
- # @return [Columns] a new instance of Columns.
7
- # @param [Array] header array of strings.
8
- # @param [Array] valid array of symbols representing valid columns.
9
- # @raise [MissingCSVHeaderColumnError] raised when header is missing
10
- # @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
11
- def initialize(header, valid: :all, converter: ConvertHeaderValue)
12
- @converter = converter
13
- @columns = build_columns(header, valid)
14
- end
15
-
16
- # Returns columns as array.
17
- # @return [Array] of columns.
18
- def to_a
19
- @columns
20
- end
21
-
22
- private
23
-
24
- def build_columns(header, valid)
25
- header.map do |column|
26
- column = @converter.call(column.dup)
27
- validate_column_presence!(column)
28
-
29
- validate_column_name!(column, valid)
30
- column
31
- end
32
- end
33
-
34
- def validate_column_presence!(col)
35
- if col.nil? || col.empty?
36
- fail(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
37
- end
38
- end
39
-
40
- def validate_column_name!(column, valid)
41
- return if valid == :all
42
-
43
- valid.include?(column) ||
44
- begin
45
- err_msg = "column :#{column} not in #{valid.inspect}"
46
- fail(UnknownCSVHeaderColumnError, err_msg)
47
- end
48
- end
49
- end
50
- end
@@ -1,46 +0,0 @@
1
- module HoneyFormat
2
- # Utility class for sanitizing various simple data types.
3
- class Sanitize
4
- # Returns array of cleaned strings.
5
- # @return [Array<String>] the cleaned array of strings.
6
- # @param [Array<String>] row the array of strings to be cleaned.
7
- # @example Sanitize array
8
- # Sanitize.array([" a "]) #=> ["a"]
9
- def self.array(row)
10
- row.map { |column| string(column) }
11
- end
12
-
13
- # Returns array of cleaned elements.
14
- # @return [String] the cleaned array.
15
- # @param [String] column the string to be cleaned.
16
- # @example Sanitize string
17
- # Sanitize.string(" a ") #=> "a"
18
- # @example Sanitize nil
19
- # Sanitize.string(nil) #=> nil
20
- def self.string(column)
21
- return column if column.nil?
22
- column.strip
23
- end
24
-
25
- # Returns mutated array of cleaned strings.
26
- # @return [Array<String>] the cleaned array of strings.
27
- # @param [Array<String>] row the array of strings to be cleaned.
28
- # @example Sanitize array
29
- # Sanitize.array!([" a "]) #=> ["a"]
30
- def self.array!(row)
31
- row.map! { |column| string!(column) }
32
- end
33
-
34
- # Returns mutated and cleaned string.
35
- # @return [String] the cleaned array.
36
- # @param [String] column the string to be cleaned.
37
- # @example Sanitize string
38
- # Sanitize.string!(" a ") #=> "a"
39
- # @example Sanitize nil
40
- # Sanitize.string!(nil) #=> nil
41
- def self.string!(column)
42
- return if column.nil?
43
- column.tap(&:strip!)
44
- end
45
- end
46
- end