honey_format 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.hound.yml +3 -0
  3. data/.rubocop.yml +7 -0
  4. data/.ruby-style-guide.yml +264 -0
  5. data/CHANGELOG.md +15 -0
  6. data/Gemfile +2 -0
  7. data/README.md +63 -15
  8. data/Rakefile +2 -0
  9. data/bin/benchmark +2 -0
  10. data/bin/console +1 -0
  11. data/exe/honey_format +1 -0
  12. data/honey_format.gemspec +5 -4
  13. data/lib/honey_format/cli/benchmark_cli.rb +15 -13
  14. data/lib/honey_format/cli/cli.rb +17 -12
  15. data/lib/honey_format/cli/result_writer.rb +2 -0
  16. data/lib/honey_format/configuration.rb +114 -11
  17. data/lib/honey_format/converters/convert_boolean.rb +24 -0
  18. data/lib/honey_format/converters/convert_date_and_time.rb +30 -0
  19. data/lib/honey_format/converters/convert_number.rb +33 -0
  20. data/lib/honey_format/converters/convert_string.rb +42 -0
  21. data/lib/honey_format/converters/converters.rb +12 -0
  22. data/lib/honey_format/converters/header_column_converter.rb +57 -0
  23. data/lib/honey_format/csv.rb +22 -14
  24. data/lib/honey_format/errors.rb +8 -2
  25. data/lib/honey_format/helpers/helpers.rb +41 -0
  26. data/lib/honey_format/{header.rb → matrix/header.rb} +48 -12
  27. data/lib/honey_format/matrix/matrix.rb +104 -0
  28. data/lib/honey_format/{row.rb → matrix/row.rb} +6 -3
  29. data/lib/honey_format/{row_builder.rb → matrix/row_builder.rb} +5 -4
  30. data/lib/honey_format/{rows.rb → matrix/rows.rb} +7 -4
  31. data/lib/honey_format/matrix.rb +6 -89
  32. data/lib/honey_format/registry.rb +99 -0
  33. data/lib/honey_format/version.rb +4 -2
  34. data/lib/honey_format.rb +14 -6
  35. metadata +34 -24
  36. data/lib/honey_format/header_column_converter.rb +0 -40
  37. data/lib/honey_format/value_converter.rb +0 -117
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'optparse'
2
4
 
3
5
  module HoneyFormat
@@ -15,8 +17,11 @@ module HoneyFormat
15
17
 
16
18
  private
17
19
 
18
- def puts(*args)
19
- @io.puts(*args)
20
+ # Puts to configured IO
21
+ # @param [#to_s] object to print
22
+ # @return [nil] nil
23
+ def puts(arg)
24
+ @io.puts(arg)
20
25
  end
21
26
 
22
27
  # Parse command line arguments and return options
@@ -33,26 +38,26 @@ module HoneyFormat
33
38
  type_map = {}
34
39
 
35
40
  OptionParser.new do |parser|
36
- parser.banner = "Usage: honey_format [options] <file.csv>"
41
+ parser.banner = 'Usage: honey_format [options] <file.csv>'
37
42
  parser.default_argv = argv
38
43
 
39
- parser.on("--csv=input.csv", String, "CSV file") do |value|
44
+ parser.on('--csv=input.csv', String, 'CSV file') do |value|
40
45
  input_path = value
41
46
  end
42
47
 
43
- parser.on("--columns=id,name", Array, "Select columns") do |value|
48
+ parser.on('--columns=id,name', Array, 'Select columns') do |value|
44
49
  columns = value&.map(&:to_sym)
45
50
  end
46
51
 
47
- parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
52
+ parser.on('--output=output.csv', String, 'CSV output (STDOUT otherwise)') do |value|
48
53
  output_path = value
49
54
  end
50
55
 
51
- parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
56
+ parser.on('--delimiter=,', String, 'CSV delimiter (default: ,)') do |value|
52
57
  delimiter = value
53
58
  end
54
59
 
55
- parser.on("--skip-lines=,", String, "Skip lines that match this pattern") do |value|
60
+ parser.on('--skip-lines=,', String, 'Skip lines that match this pattern') do |value|
56
61
  skip_lines = value
57
62
  end
58
63
 
@@ -60,15 +65,15 @@ module HoneyFormat
60
65
  type_map = option_to_h(value || [])
61
66
  end
62
67
 
63
- parser.on("--[no-]header-only", "Print only the header") do |value|
68
+ parser.on('--[no-]header-only', 'Print only the header') do |value|
64
69
  header_only = value
65
70
  end
66
71
 
67
- parser.on("--[no-]rows-only", "Print only the rows") do |value|
72
+ parser.on('--[no-]rows-only', 'Print only the rows') do |value|
68
73
  rows_only = value
69
74
  end
70
75
 
71
- parser.on("-h", "--help", "How to use") do
76
+ parser.on('-h', '--help', 'How to use') do
72
77
  puts parser
73
78
  exit
74
79
  end
@@ -96,7 +101,7 @@ module HoneyFormat
96
101
  header_only: header_only,
97
102
  rows_only: rows_only,
98
103
  skip_lines: skip_lines,
99
- type_map: type_map
104
+ type_map: type_map,
100
105
  }
101
106
  end
102
107
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HoneyFormat
2
4
  # CLI result writer handles command output
3
5
  # @attr_reader [true, false] verbose the writer mode
@@ -1,26 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/helpers/helpers'
4
+
1
5
  module HoneyFormat
2
6
  # Holds HoneyFormat configuration
3
- # @attr_reader [#call] header_converter the configured header converter
4
- # @attr_reader [#call] converter the configured value converter
5
- # @attr_writer [#call] header_converter to use
6
- # @attr_writer [#call] converter the value converter to use
7
+ # @attr_accessor [String] delimiter the default column delimiter (default: ,)
8
+ # @attr_accessor [String, Symbol] row_delimiter the default row delimiter (default: :auto)
9
+ # @attr_accessor [String] quote_character the default quote character (default: ")
10
+ # @attr_accessor [String, Regexp] skip_lines skip all lines matching pattern (default: nil)
7
11
  class Configuration
8
- attr_accessor :header_converter, :converter
12
+ attr_accessor :delimiter, :row_delimiter, :quote_character, :skip_lines
9
13
 
10
14
  # Instantiate configuration
11
15
  def initialize
12
- @converter = ValueConverter.new
13
- @header_converter = @converter[:header_column]
16
+ @converter_registry = nil
17
+ @header_converter = nil
18
+ @header_deduplicator = nil
19
+ @delimiter = ','
20
+ @row_delimiter = :auto
21
+ @quote_character = '"'
22
+ @skip_lines = nil
23
+ end
24
+
25
+ # Returns the header converter
26
+ # @return [#call] header_converter the configured header converter
27
+ def header_converter
28
+ @header_converter ||= converter_registry[:header_column]
14
29
  end
15
30
 
16
31
  # Set the header converter
17
- # @param [Symbol, #call] converter for registered value converter or object that responds to #call
32
+ # @param [Symbol, #call] converter for registered converter registry or object that
33
+ # responds to #call
18
34
  # @return [#call] the header converter
19
35
  def header_converter=(converter)
20
- if converter.is_a?(Symbol)
21
- return @header_converter = @converter[converter]
36
+ @header_converter = if converter.is_a?(Symbol)
37
+ converter_registry[converter]
38
+ else
39
+ converter
40
+ end
41
+ end
42
+
43
+ # Return the deduplication header strategy
44
+ # @return [#call] the header deduplication strategy
45
+ def header_deduplicator
46
+ @header_deduplicator ||= header_deduplicator_registry[:deduplicate]
47
+ end
48
+
49
+ # Set the deduplication header strategy
50
+ # @param [Symbol, #call]
51
+ # symbol with known strategy identifier or method that responds
52
+ # to #call(colums, key_count)
53
+ # @return [#call] the header deduplication strategy
54
+ # @raise [UnknownDeduplicationStrategyError]
55
+ def header_deduplicator=(strategy)
56
+ if header_deduplicator_registry.type?(strategy)
57
+ @header_deduplicator = header_deduplicator_registry[strategy]
58
+ elsif strategy.respond_to?(:call)
59
+ @header_deduplicator = strategy
60
+ else
61
+ message = "unknown deduplication strategy: '#{strategy}'"
62
+ raise(Errors::UnknownDeduplicationStrategyError, message)
22
63
  end
23
- @header_converter = converter
64
+ end
65
+
66
+ # Default header deduplicate strategies
67
+ # @return [Hash] the default header deduplicatation strategies
68
+ def default_header_deduplicators
69
+ @default_header_deduplicators ||= {
70
+ deduplicate: proc do |columns|
71
+ Helpers.key_count_to_deduplicated_array(columns)
72
+ end,
73
+ raise: proc do |columns|
74
+ duplicates = Helpers.duplicated_items(columns)
75
+ if duplicates.any?
76
+ message = "all columns must be unique, duplicates are: #{duplicates}"
77
+ raise(Errors::DuplicateHeaderColumnError, message)
78
+ end
79
+ columns
80
+ end,
81
+ none: proc { |columns| columns },
82
+ }.freeze
83
+ end
84
+
85
+ # Returns the column deduplication registry
86
+ # @return [#call] column deduplication registry
87
+ def header_deduplicator_registry
88
+ @header_deduplicator_registry ||= Registry.new(default_header_deduplicators)
89
+ end
90
+
91
+ # Returns the converter registry
92
+ # @return [#call] converter the configured converter registry
93
+ def converter_registry
94
+ @converter_registry ||= Registry.new(default_converters)
95
+ end
96
+
97
+ # Default converter registry
98
+ # @return [Hash] hash with default converters
99
+ def default_converters
100
+ @default_converters ||= {
101
+ # strict variants
102
+ decimal!: StrictConvertDecimal,
103
+ integer!: StrictConvertInteger,
104
+ date!: StrictConvertDate,
105
+ datetime!: StrictConvertDatetime,
106
+ symbol!: StrictConvertSymbol,
107
+ downcase!: StrictConvertDowncase,
108
+ upcase!: StrictConvertUpcase,
109
+ boolean!: StrictConvertBoolean,
110
+ # safe variants
111
+ decimal: ConvertDecimal,
112
+ decimal_or_zero: ConvertDecimalOrZero,
113
+ integer: ConvertInteger,
114
+ integer_or_zero: ConvertIntegerOrZero,
115
+ date: ConvertDate,
116
+ datetime: ConvertDatetime,
117
+ symbol: ConvertSymbol,
118
+ downcase: ConvertDowncase,
119
+ upcase: ConvertUpcase,
120
+ boolean: ConvertBoolean,
121
+ md5: ConvertMD5,
122
+ hex: ConvertHex,
123
+ nil: ConvertNil,
124
+ blank: ConvertBlank,
125
+ header_column: ConvertHeaderColumn,
126
+ }.freeze
24
127
  end
25
128
  end
26
129
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # String values considered truthy
5
+ TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
6
+ # String values considered falsy
7
+ FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
8
+
9
+ # Tries to convert value boolean to, returns nil if it can't convert
10
+ ConvertBoolean = proc do |v|
11
+ if TRUTHY.include?(v)
12
+ true
13
+ elsif FALSY.include?(v)
14
+ false
15
+ end
16
+ end
17
+
18
+ # Convert to boolean or raise error
19
+ StrictConvertBoolean = proc do |v|
20
+ ConvertBoolean.call(v).tap do |value|
21
+ raise(ArgumentError, "can't convert #{v} to boolean") if value.nil?
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'time'
5
+
6
+ module HoneyFormat
7
+ # Convert to date
8
+ ConvertDate = proc do |v|
9
+ begin
10
+ StrictConvertDate.call(v)
11
+ rescue ArgumentError, TypeError
12
+ nil
13
+ end
14
+ end
15
+
16
+ # Convert to datetime
17
+ ConvertDatetime = proc do |v|
18
+ begin
19
+ StrictConvertDatetime.call(v)
20
+ rescue ArgumentError, TypeError
21
+ nil
22
+ end
23
+ end
24
+
25
+ # Convert to date or raise error
26
+ StrictConvertDate = proc { |v| Date.parse(v) }
27
+
28
+ # Convert to datetime or raise error
29
+ StrictConvertDatetime = proc { |v| Time.parse(v) }
30
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # Converts decimal or nil
5
+ ConvertDecimal = proc do |v|
6
+ begin
7
+ Float(v)
8
+ rescue ArgumentError, TypeError
9
+ nil
10
+ end
11
+ end
12
+
13
+ # Converts to decimal or zero
14
+ ConvertDecimalOrZero = proc { |v| v.to_f }
15
+
16
+ # Convert to integer or nil
17
+ ConvertInteger = proc do |v|
18
+ begin
19
+ Integer(v)
20
+ rescue ArgumentError, TypeError
21
+ nil
22
+ end
23
+ end
24
+
25
+ # Convert to integer or zero
26
+ ConvertIntegerOrZero = proc { |v| v.to_i }
27
+
28
+ # Convert to decimal or raise error
29
+ StrictConvertDecimal = proc { |v| Float(v) }
30
+
31
+ # Convert to integer or raise error
32
+ StrictConvertInteger = proc { |v| Integer(v) }
33
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'securerandom'
5
+
6
+ module HoneyFormat
7
+ # Convert to downcase or nil
8
+ ConvertDowncase = proc { |v| v&.downcase }
9
+
10
+ # Convert to upcase or nil
11
+ ConvertUpcase = proc { |v| v&.upcase }
12
+
13
+ # Convert to symbol or nil
14
+ ConvertSymbol = proc { |v| v&.to_sym }
15
+
16
+ # Convert to md5 or nil
17
+ ConvertMD5 = proc { |v| Digest::MD5.hexdigest(v) if v }
18
+
19
+ # Convert to hex or nil
20
+ ConvertHex = proc { |v| SecureRandom.hex if v }
21
+
22
+ # Convert to blank string
23
+ ConvertBlank = proc { '' }
24
+
25
+ # Convert header column
26
+ ConvertHeaderColumn = HeaderColumnConverter
27
+
28
+ # Convert to upcase or raise error
29
+ StrictConvertUpcase = proc do |v|
30
+ ConvertUpcase.call(v) || raise(ArgumentError, "can't convert nil to upcased string")
31
+ end
32
+
33
+ # Convert to downcase or raise error
34
+ StrictConvertDowncase = proc do |v|
35
+ ConvertDowncase.call(v) || raise(ArgumentError, "can't convert nil to downcased string")
36
+ end
37
+
38
+ # Convert to symbol or raise error
39
+ StrictConvertSymbol = proc do |v|
40
+ ConvertSymbol.call(v) || raise(ArgumentError, "can't convert nil to symbol")
41
+ end
42
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/converters/header_column_converter'
4
+ require 'honey_format/converters/convert_boolean'
5
+ require 'honey_format/converters/convert_date_and_time'
6
+ require 'honey_format/converters/convert_number'
7
+ require 'honey_format/converters/convert_string'
8
+
9
+ module HoneyFormat
10
+ # Convert to nil
11
+ ConvertNil = proc {}
12
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # Header column converter
5
+ module HeaderColumnConverter
6
+ # Bracket character matcher
7
+ BRACKETS = /\(|\[|\{|\)|\]|\}/
8
+
9
+ # Separator characters
10
+ SEPS = /'|"|\||\*|\^|\&|%|\$|€|#/
11
+
12
+ # Replace map
13
+ REPLACE_MAP = [
14
+ [/\\/, '/'], # replace "\" with "/"
15
+ [/ \(/, '('], # replace " (" with "("
16
+ [/ \[/, '['], # replace " [" with "["
17
+ [/ \{/, '{'], # replace " {" with "{"
18
+ [/ \{/, '{'], # replace " {" with "{"
19
+ [/\) /, ')'], # replace ") " with ")"
20
+ [/\] /, ']'], # replace "] " with "]"
21
+ [/\} /, '}'], # replace "} " with "}"
22
+ [BRACKETS, '_'], # replace (, [, {, ), ] and } with "_"
23
+ [/ +/, '_'], # replace one or more spaces with "_"
24
+ [/-/, '_'], # replace "-" with "("
25
+ [/::/, '_'], # replace "::" with "_"
26
+ [%r{/}, '_'], # replace "/" with "_"
27
+ [SEPS, '_'], # replace separator chars with "_"
28
+ [/_+/, '_'], # replace one or more "_" with single "_"
29
+ [/\A_+/, ''], # remove leading "_"
30
+ [/_+\z/, ''], # remove trailing "_"
31
+ ].map(&:freeze).freeze
32
+
33
+ # Returns converted value and mutates the argument.
34
+ # @return [Symbol] the cleaned header column.
35
+ # @param [String] column the string to be cleaned.
36
+ # @param [Integer] index the column index.
37
+ # @example Convert simple header
38
+ # HeaderColumnConverter.call(" User name ") #=> "user_name"
39
+ # @example Convert complex header
40
+ # HeaderColumnConverter.call(" First name (user)") #=> :'first_name(user)'
41
+ def self.call(column, index = nil)
42
+ if column.nil? || column.empty?
43
+ raise(ArgumentError, "column and column index can't be blank/nil") unless index
44
+ return :"column#{index}"
45
+ end
46
+
47
+ column = column.dup
48
+ column.strip!
49
+ column.downcase!
50
+ REPLACE_MAP.each do |data|
51
+ from, to = data
52
+ column.gsub!(from, to)
53
+ end
54
+ column.to_sym
55
+ end
56
+ end
57
+ end
@@ -1,8 +1,7 @@
1
- require 'csv'
1
+ # frozen_string_literal: true
2
2
 
3
+ require 'csv'
3
4
  require 'honey_format/matrix'
4
- # require 'honey_format/rows'
5
- # require 'honey_format/header'
6
5
 
7
6
  module HoneyFormat
8
7
  # Represents CSV.
@@ -10,14 +9,18 @@ module HoneyFormat
10
9
  # Instantiate CSV.
11
10
  # @return [CSV] a new instance of CSV.
12
11
  # @param [String] csv the CSV string
13
- # @param [String] delimiter the CSV column delimiter
14
- # @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
15
- # @param [String] quote_character the CSV quote character (default: ")
16
- # @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
17
- # @param [#call] header_converter converts header columns.
18
- # @param [#call] row_builder will be called for each parsed row.
12
+ # @param delimiter [String] the CSV column delimiter
13
+ # @param row_delimiter [String, Symbol] the CSV row delimiter (default: :auto)
14
+ # @param quote_character [String] the CSV quote character (default: ")
15
+ # @param header [Array<String>]
16
+ # header optional argument that represents CSV header, required if the CSV file
17
+ # lacks a header row.
18
+ # @param header_converter [#call] converts header columns.
19
+ # @param header_deduplicator [#call] deduplicates header columns.
20
+ # @param row_builder [#call] will be called for each parsed row.
19
21
  # @param type_map [Hash] map of column_name => type conversion to perform.
20
- # @param skip_lines [Regexp, String] Regexp for determining wheter a line is a comment. See CSV skip_lines option.
22
+ # @param skip_lines [Regexp, String]
23
+ # Regexp for determining wheter a line is a comment. See CSV skip_lines option.
21
24
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
22
25
  # @raise [MissingHeaderError] raised when header is missing (empty or nil).
23
26
  # @raise [MissingHeaderColumnError] raised when header column is missing.
@@ -40,17 +43,21 @@ module HoneyFormat
40
43
  # rescue HoneyFormat::RowError => e
41
44
  # puts "row error: #{e.class}, #{e.message}"
42
45
  # end
46
+ # @example Skip lines all lines starting with '#'
47
+ # csv = HoneyFormat::CSV.new("name,id\n# some comment\njacob,1", skip_lines: '#')
48
+ # csv.rows.length # => 1
43
49
  # @see Matrix#new
44
50
  def initialize(
45
51
  csv,
46
- delimiter: ',',
47
- row_delimiter: :auto,
48
- quote_character: '"',
52
+ delimiter: HoneyFormat.config.delimiter,
53
+ row_delimiter: HoneyFormat.config.row_delimiter,
54
+ quote_character: HoneyFormat.config.quote_character,
49
55
  header: nil,
50
56
  header_converter: HoneyFormat.header_converter,
57
+ header_deduplicator: HoneyFormat.config.header_deduplicator,
51
58
  row_builder: nil,
52
59
  type_map: {},
53
- skip_lines: nil
60
+ skip_lines: HoneyFormat.config.skip_lines
54
61
  )
55
62
  csv = ::CSV.parse(
56
63
  csv,
@@ -64,6 +71,7 @@ module HoneyFormat
64
71
  csv,
65
72
  header: header,
66
73
  header_converter: header_converter,
74
+ header_deduplicator: header_deduplicator,
67
75
  row_builder: row_builder,
68
76
  type_map: type_map
69
77
  )
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HoneyFormat
2
4
  # Errors
3
5
  module Errors
@@ -8,6 +10,10 @@ module HoneyFormat
8
10
  class MissingHeaderError < HeaderError; end
9
11
  # Raised when header column is missing
10
12
  class MissingHeaderColumnError < HeaderError; end
13
+ # Raised when header column duplicate is found
14
+ class DuplicateHeaderColumnError < HeaderError; end
15
+ # Raised when deduplication strategy is unknown
16
+ class UnknownDeduplicationStrategyError < HeaderError; end
11
17
 
12
18
  # Row errors
13
19
  # Super class of errors raised when there is a row error
@@ -19,9 +25,9 @@ module HoneyFormat
19
25
 
20
26
  # Value conversion errors
21
27
  # Raised when value type is unknown
22
- class UnknownValueTypeError < ArgumentError; end
28
+ class UnknownTypeError < ArgumentError; end
23
29
  # Raised when value type already exists
24
- class ValueTypeExistsError < ArgumentError; end
30
+ class TypeExistsError < ArgumentError; end
25
31
  end
26
32
 
27
33
  include Errors
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ module Helpers
5
+ # Converts a Hash with key => count to a deduplicated array.
6
+ # @param [Hash] data with key => count
7
+ # @return [Array<Symbol>] an array of symbols
8
+ # @example
9
+ # Helpers.key_count_to_deduplicated_array({ a: 2, b: 1, c: 0})
10
+ # # => [:a, :a1, :b]
11
+ def self.key_count_to_deduplicated_array(data)
12
+ array = []
13
+ count_occurences(data).each do |key, value|
14
+ next array << key if value == 1
15
+
16
+ values = Array.new(value) { |i| i }.map do |index|
17
+ next key if index.zero?
18
+ :"#{key}#{index}"
19
+ end
20
+ array.concat(values)
21
+ end
22
+ array
23
+ end
24
+
25
+ # Returns hash with key => occurrences_count
26
+ # @param [Array<Object>] the array to count occurrences in
27
+ # @return [Hash] key => occurrences_count
28
+ def self.count_occurences(array)
29
+ occurrences = Hash.new(0)
30
+ array.each { |column| occurrences[column] += 1 }
31
+ occurrences
32
+ end
33
+
34
+ # Returns array with duplicated objects
35
+ # @param [Array<Object>] the array to find duplicates in
36
+ # @return [Array<Object>] array of duplicated objects
37
+ def self.duplicated_items(array)
38
+ array.select { |col| array.count(col) > 1 }.uniq
39
+ end
40
+ end
41
+ end
@@ -1,4 +1,6 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/helpers/helpers'
2
4
 
3
5
  module HoneyFormat
4
6
  # Represents a header
@@ -8,28 +10,36 @@ module HoneyFormat
8
10
  # Instantiate a Header
9
11
  # @return [Header] a new instance of Header.
10
12
  # @param [Array<String>] header array of strings.
11
- # @param converter [#call, Symbol] header converter that implements a #call method that takes one column (string) argument OR symbol for a registered value converter.
13
+ # @param converter [#call, Symbol]
14
+ # header converter that implements a #call method
15
+ # that takes one column (string) argument OR symbol for a registered
16
+ # converter registry.
17
+ # @param deduplicator [#call, Symbol]
18
+ # header deduplicator that implements a #call method
19
+ # that takes columns Array<String> argument OR symbol for a registered
20
+ # deduplicator registry.
12
21
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
13
22
  # @raise [MissingHeaderColumnError] raised when header is missing
14
23
  # @example Instantiate a header with a custom converter
15
24
  # converter = ->(col) { col == 'username' ? 'handle' : col }
16
25
  # header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
17
26
  # header.to_a # => ['name', 'handle']
18
- def initialize(header, converter: HoneyFormat.header_converter)
27
+ def initialize(
28
+ header,
29
+ converter: HoneyFormat.header_converter,
30
+ deduplicator: HoneyFormat.config.header_deduplicator
31
+ )
19
32
  if header.nil? || header.empty?
20
33
  raise(Errors::MissingHeaderError, "CSV header can't be empty.")
21
34
  end
22
35
 
23
36
  @original_header = header
24
- @converter = if converter.is_a?(Symbol)
25
- HoneyFormat.value_converter[converter]
26
- else
27
- converter
28
- end
29
-
37
+ self.deduplicator = deduplicator
38
+ self.converter = converter
30
39
  @columns = build_columns(@original_header)
31
40
  end
32
41
 
42
+ # Returns the original header
33
43
  # @return [Array<String>] the original header
34
44
  def original
35
45
  @original_header
@@ -82,15 +92,41 @@ module HoneyFormat
82
92
 
83
93
  private
84
94
 
95
+ # Set the header converter
96
+ # @param [Symbol, #call] symbol to known converter or object that responds to #call
97
+ # @return [nil]
98
+ def converter=(object)
99
+ if object.is_a?(Symbol)
100
+ @converter = HoneyFormat.converter_registry[object]
101
+ return
102
+ end
103
+
104
+ @converter = object
105
+ end
106
+
107
+ # Set the header deduplicator
108
+ # @param [Symbol, #call] symbol to known deduplicator or object that responds to #call
109
+ # @return [nil]
110
+ def deduplicator=(object)
111
+ if object.is_a?(Symbol)
112
+ @deduplicator = HoneyFormat.header_deduplicator_registry[object]
113
+ return
114
+ end
115
+
116
+ @deduplicator = object
117
+ end
118
+
85
119
  # Convert original header
86
120
  # @param [Array<String>] header the original header
87
121
  # @return [Array<String>] converted columns
88
122
  def build_columns(header)
89
- header.each_with_index.map do |header_column, index|
123
+ columns = header.each_with_index.map do |header_column, index|
90
124
  convert_column(header_column, index).tap do |column|
91
125
  maybe_raise_missing_column!(column)
92
126
  end
93
127
  end
128
+
129
+ @deduplicator.call(columns)
94
130
  end
95
131
 
96
132
  # Convert the column value
@@ -122,8 +158,8 @@ module HoneyFormat
122
158
 
123
159
  parts = [
124
160
  "CSV header column can't be nil or empty!",
125
- "When you pass your own converter make sure that it never returns nil or an empty string.",
126
- 'Instead generate unique columns names.'
161
+ 'When you pass your own converter make sure that it never returns nil or an empty string.', # rubocop:disable Metrics/LineLength
162
+ 'Instead generate unique columns names.',
127
163
  ]
128
164
  raise(Errors::MissingHeaderColumnError, parts.join(' '))
129
165
  end