honey_format 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,8 +8,6 @@ module HoneyFormat
8
8
  class MissingHeaderError < HeaderError; end
9
9
  # Raised when header column is missing
10
10
  class MissingHeaderColumnError < HeaderError; end
11
- # Raised when a column is not in passed valid columns
12
- class UnknownHeaderColumnError < HeaderError; end
13
11
 
14
12
  # Row errors
15
13
  # Super class of errors raised when there is a row error
@@ -18,6 +16,12 @@ module HoneyFormat
18
16
  class EmptyRowColumnsError < RowError; end
19
17
  # Raised when row has more columns than header columns
20
18
  class InvalidRowLengthError < RowError; end
19
+
20
+ # Value conversion errors
21
+ # Raised when value type is unknown
22
+ class UnknownValueTypeError < ArgumentError; end
23
+ # Raised when value type already exists
24
+ class ValueTypeExistsError < ArgumentError; end
21
25
  end
22
26
 
23
27
  include Errors
@@ -1,4 +1,4 @@
1
- require 'honey_format/convert_header_value'
1
+ require 'set'
2
2
 
3
3
  module HoneyFormat
4
4
  # Represents a header
@@ -8,23 +8,26 @@ module HoneyFormat
8
8
  # Instantiate a Header
9
9
  # @return [Header] a new instance of Header.
10
10
  # @param [Array<String>] header array of strings.
11
- # @param [Array<Symbol, String>] valid array representing the valid columns, if empty all columns will be considered valid.
12
- # @param converter [#call] header converter that implements a #call method that takes one column (string) argument.
11
+ # @param converter [#call, Symbol] header converter that implements a #call method that takes one column (string) argument OR symbol for a registered value converter.
13
12
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
14
13
  # @raise [MissingHeaderColumnError] raised when header is missing
15
- # @raise [UnknownHeaderColumnError] raised when column is not in valid list.
16
- # @example Instantiate a header with a customer converter
14
+ # @example Instantiate a header with a custom converter
17
15
  # converter = ->(col) { col == 'username' ? 'handle' : col }
18
16
  # header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
19
17
  # header.to_a # => ['name', 'handle']
20
- def initialize(header, valid: [], converter: ConvertHeaderValue)
18
+ def initialize(header, converter: HoneyFormat.header_converter)
21
19
  if header.nil? || header.empty?
22
20
  raise(Errors::MissingHeaderError, "CSV header can't be empty.")
23
21
  end
24
22
 
25
23
  @original_header = header
26
- @converter = converter
27
- @columns = build_columns(@original_header, valid)
24
+ @converter = if converter.is_a?(Symbol)
25
+ HoneyFormat.value_converter[converter]
26
+ else
27
+ converter
28
+ end
29
+
30
+ @columns = build_columns(@original_header)
28
31
  end
29
32
 
30
33
  # @return [Array<String>] the original header
@@ -32,6 +35,12 @@ module HoneyFormat
32
35
  @original_header
33
36
  end
34
37
 
38
+ # Returns true if columns contains no elements.
39
+ # @return [true, false] true if columns contains no elements.
40
+ def empty?
41
+ @columns.empty?
42
+ end
43
+
35
44
  # @yield [row] The given block will be passed for every column.
36
45
  # @yieldparam [Row] a column in the CSV header.
37
46
  # @return [Enumerator]
@@ -63,7 +72,7 @@ module HoneyFormat
63
72
  # @return [String] CSV-string representation.
64
73
  def to_csv(columns: nil)
65
74
  attributes = if columns
66
- self.columns & columns
75
+ self.columns & columns.map(&:to_sym)
67
76
  else
68
77
  self.columns
69
78
  end
@@ -76,24 +85,25 @@ module HoneyFormat
76
85
  # Convert original header
77
86
  # @param [Array<String>] header the original header
78
87
  # @return [Array<String>] converted columns
79
- def build_columns(header, valid)
80
- valid = valid.map(&:to_sym)
81
-
88
+ def build_columns(header)
82
89
  header.each_with_index.map do |header_column, index|
83
90
  convert_column(header_column, index).tap do |column|
84
91
  maybe_raise_missing_column!(column)
85
- maybe_raise_unknown_column!(column, valid)
86
92
  end
87
93
  end
88
94
  end
89
95
 
90
96
  # Convert the column value
91
- # @param [Object] column the CSV header column value
97
+ # @param [String, Symbol] column the CSV header column value
92
98
  # @param [Integer] index the CSV header column index
93
- # @return [Object] the converted object
99
+ # @return [Symbol] the converted column
94
100
  def convert_column(column, index)
95
- return @converter.call(column) if converter_arity == 1
96
- @converter.call(column, index)
101
+ value = if converter_arity == 1
102
+ @converter.call(column)
103
+ else
104
+ @converter.call(column, index)
105
+ end
106
+ value.to_sym
97
107
  end
98
108
 
99
109
  # Returns the converter#call method arity
@@ -104,18 +114,6 @@ module HoneyFormat
104
114
  @converter.method(:call).arity
105
115
  end
106
116
 
107
- # Raises an error if header column is unknown
108
- # @param [Object] column the CSV header column
109
- # @param [Array<Symbol, String>] valid CSV columns
110
- # @raise [Errors::UnknownHeaderColumnError]
111
- def maybe_raise_unknown_column!(column, valid)
112
- return if valid.empty?
113
- return if valid.include?(column)
114
-
115
- err_msg = "column :#{column} not in #{valid.inspect}"
116
- raise(Errors::UnknownHeaderColumnError, err_msg)
117
- end
118
-
119
117
  # Raises an error if header column is missing/empty
120
118
  # @param [Object] column the CSV header column
121
119
  # @raise [Errors::MissingHeaderColumnError]
@@ -1,6 +1,6 @@
1
1
  module HoneyFormat
2
2
  # Header column converter
3
- module ConvertHeaderValue
3
+ module HeaderColumnConverter
4
4
  # Replace map
5
5
  REPLACE_MAP = [
6
6
  [/ \(/, '('],
@@ -16,13 +16,16 @@ module HoneyFormat
16
16
  # Returns converted value and mutates the argument.
17
17
  # @return [Symbol] the cleaned header column.
18
18
  # @param [String] column the string to be cleaned.
19
- # @param [Integer] column index.
19
+ # @param [Integer] index the column index.
20
20
  # @example Convert simple header
21
- # ConvertHeaderValue.call(" User name ") #=> "user_name"
21
+ # HeaderColumnConverter.call(" User name ") #=> "user_name"
22
22
  # @example Convert complex header
23
- # ConvertHeaderValue.call(" First name (user)") #=> :'first_name(user)'
24
- def self.call(column, index)
25
- return :"column#{index}" if column.nil? || column.empty?
23
+ # HeaderColumnConverter.call(" First name (user)") #=> :'first_name(user)'
24
+ def self.call(column, index = nil)
25
+ if column.nil? || column.empty?
26
+ raise(ArgumentError, "column and column index can't be blank/nil") unless index
27
+ return :"column#{index}"
28
+ end
26
29
 
27
30
  column = column.dup
28
31
  column.strip!
@@ -0,0 +1,90 @@
1
+ require 'csv'
2
+
3
+ require 'honey_format/rows'
4
+ require 'honey_format/header'
5
+
6
+ module HoneyFormat
7
+ # Represents CSV.
8
+ class Matrix
9
+ # Instantiate CSV.
10
+ # @return [CSV] a new instance of Matrix.
11
+ # @param [Array<Array<String, nil>>] matrix
12
+ # @param [Array<String>] header optional argument that represents header, required if the matrix lacks a header row.
13
+ # @param [#call] header_converter converts header columns.
14
+ # @param [#call] row_builder will be called for each parsed row.
15
+ # @param type_map [Hash] map of column_name => type conversion to perform.
16
+ # @raise [HeaderError] super class of errors raised when there is a header error.
17
+ # @raise [MissingHeaderError] raised when header is missing (empty or nil).
18
+ # @raise [MissingHeaderColumnError] raised when header column is missing.
19
+ # @raise [RowError] super class of errors raised when there is a row error.
20
+ # @raise [EmptyRowColumnsError] raised when row columns are empty.
21
+ # @raise [InvalidRowLengthError] raised when row has more columns than header columns.
22
+ # @example
23
+ # matrix = HoneyFormat::Matrix.new([%w[name id]])
24
+ # @example With custom header converter
25
+ # converter = proc { |v| v == 'name' ? 'first_name' : v }
26
+ # matrix = HoneyFormat::Matrix.new([%w[name id]], header_converter: converter)
27
+ # matrix.columns # => [:first_name, :id]
28
+ # @example Handle errors
29
+ # begin
30
+ # matrix = HoneyFormat::Matrix.new([%w[name id]])
31
+ # rescue HoneyFormat::HeaderError => e
32
+ # puts "header error: #{e.class}, #{e.message}"
33
+ # rescue HoneyFormat::RowError => e
34
+ # puts "row error: #{e.class}, #{e.message}"
35
+ # end
36
+ def initialize(
37
+ matrix,
38
+ header: nil,
39
+ header_converter: HoneyFormat.header_converter,
40
+ row_builder: nil,
41
+ type_map: {}
42
+ )
43
+ header_row = header || matrix.shift
44
+ @header = Header.new(header_row, converter: header_converter)
45
+ @rows = Rows.new(matrix, columns, builder: row_builder, type_map: type_map)
46
+ end
47
+
48
+ # Original CSV header
49
+ # @return [Header] object representing the CSV header.
50
+ def header
51
+ @header
52
+ end
53
+
54
+ # CSV columns converted from the original CSV header
55
+ # @return [Array<Symbol>] of column identifiers.
56
+ def columns
57
+ @header.to_a
58
+ end
59
+
60
+ # @return [Rows] of rows.
61
+ def rows
62
+ @rows
63
+ end
64
+
65
+ # @yield [row] The given block will be passed for every row.
66
+ # @yieldparam [Row] row in the CSV.
67
+ # @return [Enumerator] If no block is given, an enumerator object will be returned.
68
+ def each_row
69
+ return rows.each unless block_given?
70
+
71
+ rows.each { |row| yield(row) }
72
+ end
73
+
74
+ # Convert matrix to CSV-string.
75
+ # @param columns [Array<Symbol>, Set<Symbol>, NilClass] the columns to output, nil means all columns (default: nil)
76
+ # @yield [row] The given block will be passed for every row - return truthy if you want the row to be included in the output
77
+ # @yieldparam [Row] row
78
+ # @return [String] CSV-string representation.
79
+ # @example with selected columns
80
+ # matrix.to_csv(columns: [:id, :country])
81
+ # @example with selected rows
82
+ # matrix.to_csv { |row| row.country == 'Sweden' }
83
+ # @example with both selected columns and rows
84
+ # matrix.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
85
+ def to_csv(columns: nil, &block)
86
+ columns = columns&.map(&:to_sym)
87
+ @header.to_csv(columns: columns) + @rows.to_csv(columns: columns, &block)
88
+ end
89
+ end
90
+ end
@@ -18,7 +18,7 @@ module HoneyFormat
18
18
  attributes = members
19
19
  attributes = columns & attributes if columns
20
20
 
21
- row = attributes.map { |column| to_csv_value(column) }
21
+ row = attributes.map! { |column| to_csv_value(column) }
22
22
 
23
23
  ::CSV.generate_line(row)
24
24
  end
@@ -6,24 +6,28 @@ module HoneyFormat
6
6
  # Returns a new instance of RowBuilder.
7
7
  # @return [RowBuilder] a new instance of RowBuilder.
8
8
  # @param [Array<Symbol>] columns an array of symbols.
9
- # @param builder [#call, #to_csv] optional row builder
9
+ # @param builder [#call, #to_csv] optional row builder.
10
+ # @param type_map [Hash] map of column_name => type conversion to perform.
10
11
  # @raise [RowError] super class of errors raised when there is a row error.
11
12
  # @raise [EmptyRowColumnsError] raised when there are no columns.
12
13
  # @raise [InvalidRowLengthError] raised when row has more columns than header columns.
13
14
  # @example Create new row
14
15
  # RowBuilder.new!([:id])
15
- def initialize(columns, builder: nil)
16
+ def initialize(columns, builder: nil, type_map: {})
16
17
  if columns.empty?
17
18
  err_msg = 'Expected array with at least one element, but was empty.'
18
19
  raise(Errors::EmptyRowColumnsError, err_msg)
19
20
  end
20
21
 
22
+ @type_map = type_map
23
+ @converter = HoneyFormat.value_converter
24
+
21
25
  @row_klass = Row.new(*columns)
22
26
  @builder = builder
23
27
  @columns = columns
24
28
  end
25
29
 
26
- # Returns a Struct.
30
+ # Returns an object representing the row.
27
31
  # @return [Row, Object] a new instance of built row.
28
32
  # @param row [Array] the row array.
29
33
  # @raise [InvalidRowLengthError] raised when there are more row elements longer than columns
@@ -31,9 +35,7 @@ module HoneyFormat
31
35
  # r = RowBuilder.new([:id])
32
36
  # r.build(['1']).id #=> '1'
33
37
  def build(row)
34
- row = @row_klass.call(row)
35
- return row unless @builder
36
- @builder.call(row)
38
+ build_row!(row)
37
39
  rescue ArgumentError => e
38
40
  raise unless e.message == 'struct size differs'
39
41
  raise_invalid_row_length!(e, row)
@@ -41,6 +43,25 @@ module HoneyFormat
41
43
 
42
44
  private
43
45
 
46
+ # Returns Struct
47
+ # @return [Row, Object] a new instance of built row.
48
+ # @param row [Array] the row array.
49
+ # @raise [ArgumentError] raised when struct fails to build
50
+ # @example Build new row
51
+ # r = RowBuilder.new([:id])
52
+ # r.build(['1']).id #=> '1'
53
+ def build_row!(row)
54
+ row = @row_klass.call(row)
55
+
56
+ # Convert values
57
+ @type_map.each do |column, type|
58
+ row[column] = @converter.call(row[column], type)
59
+ end
60
+
61
+ return row unless @builder
62
+ @builder.call(row)
63
+ end
64
+
44
65
  # Raises invalid row length error
45
66
  # @param [StandardError] e the raised error
46
67
  # @param [Object] row
@@ -9,15 +9,22 @@ module HoneyFormat
9
9
  # Returns array of cleaned strings.
10
10
  # @return [Rows] new instance of Rows.
11
11
  # @param [Array] rows the array of rows.
12
- # @param [Array] columns the array of column symbols.
12
+ # @param [Array<Symbol>] columns the array of column symbols.
13
+ # @param type_map [Hash] map of column_name => type conversion to perform.
13
14
  # @raise [RowError] super class of errors raised when there is a row error.
14
15
  # @raise [EmptyRowColumnsError] raised when there are no columns.
15
16
  # @raise [InvalidRowLengthError] raised when row has more columns than header columns.
16
- def initialize(rows, columns, builder: nil)
17
- builder = RowBuilder.new(columns, builder: builder)
17
+ def initialize(rows, columns, builder: nil, type_map: {})
18
+ builder = RowBuilder.new(columns, builder: builder, type_map: type_map)
18
19
  @rows = prepare_rows(builder, rows)
19
20
  end
20
21
 
22
+ # Returns true if rows contains no elements.
23
+ # @return [true, false] true if rows contains no elements.
24
+ def empty?
25
+ @rows.empty?
26
+ end
27
+
21
28
  # @yield [row] The given block will be passed for every row.
22
29
  # @yieldparam [Row] a row in the CSV file.
23
30
  # @return [Enumerator]
@@ -51,7 +58,7 @@ module HoneyFormat
51
58
  # csv.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
52
59
  def to_csv(columns: nil, &block)
53
60
  # Convert columns to Set for performance
54
- columns = Set.new(columns) if columns
61
+ columns = Set.new(columns.map(&:to_sym)) if columns
55
62
  csv_rows = []
56
63
  each do |row|
57
64
  if !block || block.call(row)
@@ -66,7 +73,7 @@ module HoneyFormat
66
73
  def prepare_rows(builder, rows)
67
74
  built_rows = []
68
75
  rows.each do |row|
69
- # ignore empty rows - the Ruby CSV library can return empty lines as [nil]
76
+ # ignore empty rows
70
77
  next if row.nil? || row.empty? || row == [nil]
71
78
  built_rows << builder.build(row)
72
79
  end
@@ -0,0 +1,112 @@
1
+ require 'date'
2
+ require 'time'
3
+ require 'set'
4
+ require 'digest'
5
+
6
+ require 'honey_format/header_column_converter'
7
+
8
+ module HoneyFormat
9
+ # Converts values
10
+ class ValueConverter
11
+ TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
12
+ FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
13
+
14
+ CONVERT_BOOLEAN = lambda { |v|
15
+ value = v&.downcase
16
+ if TRUTHY.include?(value)
17
+ true
18
+ elsif FALSY.include?(value)
19
+ false
20
+ else
21
+ nil
22
+ end
23
+ }
24
+
25
+ # Default value converters
26
+ DEFAULT_CONVERTERS = {
27
+ # strict variants
28
+ decimal!: proc { |v| Float(v) },
29
+ integer!: proc { |v| Integer(v) },
30
+ date!: proc { |v| Date.parse(v) },
31
+ datetime!: proc { |v| Time.parse(v) },
32
+ symbol!: proc { |v| v&.to_sym || raise(ArgumentError, "can't convert nil to symbol") },
33
+ downcase!: proc { |v| v&.downcase || raise(ArgumentError, "can't convert nil to downcased string") },
34
+ upcase!: proc { |v| v&.upcase || raise(ArgumentError, "can't convert nil to upcased string") },
35
+ boolean!: proc { |v|
36
+ value = CONVERT_BOOLEAN.call(v)
37
+ raise(ArgumentError, "can't convert #{v} to boolean") if value.nil?
38
+ value
39
+ },
40
+ # safe variants
41
+ decimal: proc { |v| Float(v) rescue nil },
42
+ integer: proc { |v| Integer(v) rescue nil },
43
+ date: proc { |v| Date.parse(v) rescue nil },
44
+ datetime: proc { |v| Time.parse(v) rescue nil },
45
+ symbol: proc { |v| v&.to_sym },
46
+ downcase: proc { |v| v&.downcase },
47
+ upcase: proc { |v| v&.upcase },
48
+ boolean: proc { |v| CONVERT_BOOLEAN.call(v) },
49
+ md5: proc { |v| Digest::MD5.hexdigest(v) if v },
50
+ nil: proc {},
51
+ header_column: HeaderColumnConverter,
52
+ }.freeze
53
+
54
+ # Instantiate a value converter
55
+ def initialize
56
+ @converters = DEFAULT_CONVERTERS.dup
57
+ end
58
+
59
+ # Returns list of registered types
60
+ # @return [Array<Symbol>] list of registered types
61
+ def types
62
+ @converters.keys
63
+ end
64
+
65
+ # Register a value converter
66
+ # @param [Symbol, String] type the name of the type
67
+ # @param [#call] converter that responds to #call
68
+ # @return [ValueConverter] returns self
69
+ # @raise [ValueTypeExistsError] if type is already registered
70
+ def register(type, converter)
71
+ self[type] = converter
72
+ self
73
+ end
74
+
75
+ # Convert value
76
+ # @param [Symbol, String] type the name of the type
77
+ # @param [Object] value to be converted
78
+ def call(value, type)
79
+ self[type].call(value)
80
+ end
81
+
82
+ # Register a value converter
83
+ # @param [Symbol, String] type the name of the type
84
+ # @param [#call] converter that responds to #call
85
+ # @return [Object] returns the converter
86
+ # @raise [ValueTypeExistsError] if type is already registered
87
+ def []=(type, converter)
88
+ type = type.to_sym
89
+
90
+ if type?(type)
91
+ raise(Errors::ValueTypeExistsError, "type '#{type}' already exists")
92
+ end
93
+
94
+ @converters[type] = converter
95
+ end
96
+
97
+ # @param [Symbol, String] type the name of the type
98
+ # @return [Object] returns the converter
99
+ # @raise [UnknownValueTypeError] if type does not exist
100
+ def [](type)
101
+ @converters.fetch(type.to_sym) do
102
+ raise(Errors::UnknownValueTypeError, "unknown type '#{type}'")
103
+ end
104
+ end
105
+
106
+ # @param [Symbol, String] type the name of the type
107
+ # @return [true, false] true if type exists, false otherwise
108
+ def type?(type)
109
+ @converters.key?(type.to_sym)
110
+ end
111
+ end
112
+ end