honey_format 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.hound.yml +3 -0
  3. data/.rubocop.yml +7 -0
  4. data/.ruby-style-guide.yml +264 -0
  5. data/CHANGELOG.md +15 -0
  6. data/Gemfile +2 -0
  7. data/README.md +63 -15
  8. data/Rakefile +2 -0
  9. data/bin/benchmark +2 -0
  10. data/bin/console +1 -0
  11. data/exe/honey_format +1 -0
  12. data/honey_format.gemspec +5 -4
  13. data/lib/honey_format/cli/benchmark_cli.rb +15 -13
  14. data/lib/honey_format/cli/cli.rb +17 -12
  15. data/lib/honey_format/cli/result_writer.rb +2 -0
  16. data/lib/honey_format/configuration.rb +114 -11
  17. data/lib/honey_format/converters/convert_boolean.rb +24 -0
  18. data/lib/honey_format/converters/convert_date_and_time.rb +30 -0
  19. data/lib/honey_format/converters/convert_number.rb +33 -0
  20. data/lib/honey_format/converters/convert_string.rb +42 -0
  21. data/lib/honey_format/converters/converters.rb +12 -0
  22. data/lib/honey_format/converters/header_column_converter.rb +57 -0
  23. data/lib/honey_format/csv.rb +22 -14
  24. data/lib/honey_format/errors.rb +8 -2
  25. data/lib/honey_format/helpers/helpers.rb +41 -0
  26. data/lib/honey_format/{header.rb → matrix/header.rb} +48 -12
  27. data/lib/honey_format/matrix/matrix.rb +104 -0
  28. data/lib/honey_format/{row.rb → matrix/row.rb} +6 -3
  29. data/lib/honey_format/{row_builder.rb → matrix/row_builder.rb} +5 -4
  30. data/lib/honey_format/{rows.rb → matrix/rows.rb} +7 -4
  31. data/lib/honey_format/matrix.rb +6 -89
  32. data/lib/honey_format/registry.rb +99 -0
  33. data/lib/honey_format/version.rb +4 -2
  34. data/lib/honey_format.rb +14 -6
  35. metadata +34 -24
  36. data/lib/honey_format/header_column_converter.rb +0 -40
  37. data/lib/honey_format/value_converter.rb +0 -117
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'optparse'
2
4
 
3
5
  module HoneyFormat
@@ -15,8 +17,11 @@ module HoneyFormat
15
17
 
16
18
  private
17
19
 
18
- def puts(*args)
19
- @io.puts(*args)
20
+ # Puts to configured IO
21
+ # @param [#to_s] object to print
22
+ # @return [nil] nil
23
+ def puts(arg)
24
+ @io.puts(arg)
20
25
  end
21
26
 
22
27
  # Parse command line arguments and return options
@@ -33,26 +38,26 @@ module HoneyFormat
33
38
  type_map = {}
34
39
 
35
40
  OptionParser.new do |parser|
36
- parser.banner = "Usage: honey_format [options] <file.csv>"
41
+ parser.banner = 'Usage: honey_format [options] <file.csv>'
37
42
  parser.default_argv = argv
38
43
 
39
- parser.on("--csv=input.csv", String, "CSV file") do |value|
44
+ parser.on('--csv=input.csv', String, 'CSV file') do |value|
40
45
  input_path = value
41
46
  end
42
47
 
43
- parser.on("--columns=id,name", Array, "Select columns") do |value|
48
+ parser.on('--columns=id,name', Array, 'Select columns') do |value|
44
49
  columns = value&.map(&:to_sym)
45
50
  end
46
51
 
47
- parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
52
+ parser.on('--output=output.csv', String, 'CSV output (STDOUT otherwise)') do |value|
48
53
  output_path = value
49
54
  end
50
55
 
51
- parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
56
+ parser.on('--delimiter=,', String, 'CSV delimiter (default: ,)') do |value|
52
57
  delimiter = value
53
58
  end
54
59
 
55
- parser.on("--skip-lines=,", String, "Skip lines that match this pattern") do |value|
60
+ parser.on('--skip-lines=,', String, 'Skip lines that match this pattern') do |value|
56
61
  skip_lines = value
57
62
  end
58
63
 
@@ -60,15 +65,15 @@ module HoneyFormat
60
65
  type_map = option_to_h(value || [])
61
66
  end
62
67
 
63
- parser.on("--[no-]header-only", "Print only the header") do |value|
68
+ parser.on('--[no-]header-only', 'Print only the header') do |value|
64
69
  header_only = value
65
70
  end
66
71
 
67
- parser.on("--[no-]rows-only", "Print only the rows") do |value|
72
+ parser.on('--[no-]rows-only', 'Print only the rows') do |value|
68
73
  rows_only = value
69
74
  end
70
75
 
71
- parser.on("-h", "--help", "How to use") do
76
+ parser.on('-h', '--help', 'How to use') do
72
77
  puts parser
73
78
  exit
74
79
  end
@@ -96,7 +101,7 @@ module HoneyFormat
96
101
  header_only: header_only,
97
102
  rows_only: rows_only,
98
103
  skip_lines: skip_lines,
99
- type_map: type_map
104
+ type_map: type_map,
100
105
  }
101
106
  end
102
107
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HoneyFormat
2
4
  # CLI result writer handles command output
3
5
  # @attr_reader [true, false] verbose the writer mode
@@ -1,26 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/helpers/helpers'
4
+
1
5
  module HoneyFormat
2
6
  # Holds HoneyFormat configuration
3
- # @attr_reader [#call] header_converter the configured header converter
4
- # @attr_reader [#call] converter the configured value converter
5
- # @attr_writer [#call] header_converter to use
6
- # @attr_writer [#call] converter the value converter to use
7
+ # @attr_accessor [String] delimiter the default column delimiter (default: ,)
8
+ # @attr_accessor [String, Symbol] row_delimiter the default row delimiter (default: :auto)
9
+ # @attr_accessor [String] quote_character the default quote character (default: ")
10
+ # @attr_accessor [String, Regexp] skip_lines skip all lines matching pattern (default: nil)
7
11
  class Configuration
8
- attr_accessor :header_converter, :converter
12
+ attr_accessor :delimiter, :row_delimiter, :quote_character, :skip_lines
9
13
 
10
14
  # Instantiate configuration
11
15
  def initialize
12
- @converter = ValueConverter.new
13
- @header_converter = @converter[:header_column]
16
+ @converter_registry = nil
17
+ @header_converter = nil
18
+ @header_deduplicator = nil
19
+ @delimiter = ','
20
+ @row_delimiter = :auto
21
+ @quote_character = '"'
22
+ @skip_lines = nil
23
+ end
24
+
25
+ # Returns the header converter
26
+ # @return [#call] header_converter the configured header converter
27
+ def header_converter
28
+ @header_converter ||= converter_registry[:header_column]
14
29
  end
15
30
 
16
31
  # Set the header converter
17
- # @param [Symbol, #call] converter for registered value converter or object that responds to #call
32
+ # @param [Symbol, #call] converter for registered converter registry or object that
33
+ # responds to #call
18
34
  # @return [#call] the header converter
19
35
  def header_converter=(converter)
20
- if converter.is_a?(Symbol)
21
- return @header_converter = @converter[converter]
36
+ @header_converter = if converter.is_a?(Symbol)
37
+ converter_registry[converter]
38
+ else
39
+ converter
40
+ end
41
+ end
42
+
43
+ # Return the deduplication header strategy
44
+ # @return [#call] the header deduplication strategy
45
+ def header_deduplicator
46
+ @header_deduplicator ||= header_deduplicator_registry[:deduplicate]
47
+ end
48
+
49
+ # Set the deduplication header strategy
50
+ # @param [Symbol, #call]
51
+ # symbol with known strategy identifier or method that responds
52
+ # to #call(colums, key_count)
53
+ # @return [#call] the header deduplication strategy
54
+ # @raise [UnknownDeduplicationStrategyError]
55
+ def header_deduplicator=(strategy)
56
+ if header_deduplicator_registry.type?(strategy)
57
+ @header_deduplicator = header_deduplicator_registry[strategy]
58
+ elsif strategy.respond_to?(:call)
59
+ @header_deduplicator = strategy
60
+ else
61
+ message = "unknown deduplication strategy: '#{strategy}'"
62
+ raise(Errors::UnknownDeduplicationStrategyError, message)
22
63
  end
23
- @header_converter = converter
64
+ end
65
+
66
+ # Default header deduplicate strategies
67
+ # @return [Hash] the default header deduplicatation strategies
68
+ def default_header_deduplicators
69
+ @default_header_deduplicators ||= {
70
+ deduplicate: proc do |columns|
71
+ Helpers.key_count_to_deduplicated_array(columns)
72
+ end,
73
+ raise: proc do |columns|
74
+ duplicates = Helpers.duplicated_items(columns)
75
+ if duplicates.any?
76
+ message = "all columns must be unique, duplicates are: #{duplicates}"
77
+ raise(Errors::DuplicateHeaderColumnError, message)
78
+ end
79
+ columns
80
+ end,
81
+ none: proc { |columns| columns },
82
+ }.freeze
83
+ end
84
+
85
+ # Returns the column deduplication registry
86
+ # @return [#call] column deduplication registry
87
+ def header_deduplicator_registry
88
+ @header_deduplicator_registry ||= Registry.new(default_header_deduplicators)
89
+ end
90
+
91
+ # Returns the converter registry
92
+ # @return [#call] converter the configured converter registry
93
+ def converter_registry
94
+ @converter_registry ||= Registry.new(default_converters)
95
+ end
96
+
97
+ # Default converter registry
98
+ # @return [Hash] hash with default converters
99
+ def default_converters
100
+ @default_converters ||= {
101
+ # strict variants
102
+ decimal!: StrictConvertDecimal,
103
+ integer!: StrictConvertInteger,
104
+ date!: StrictConvertDate,
105
+ datetime!: StrictConvertDatetime,
106
+ symbol!: StrictConvertSymbol,
107
+ downcase!: StrictConvertDowncase,
108
+ upcase!: StrictConvertUpcase,
109
+ boolean!: StrictConvertBoolean,
110
+ # safe variants
111
+ decimal: ConvertDecimal,
112
+ decimal_or_zero: ConvertDecimalOrZero,
113
+ integer: ConvertInteger,
114
+ integer_or_zero: ConvertIntegerOrZero,
115
+ date: ConvertDate,
116
+ datetime: ConvertDatetime,
117
+ symbol: ConvertSymbol,
118
+ downcase: ConvertDowncase,
119
+ upcase: ConvertUpcase,
120
+ boolean: ConvertBoolean,
121
+ md5: ConvertMD5,
122
+ hex: ConvertHex,
123
+ nil: ConvertNil,
124
+ blank: ConvertBlank,
125
+ header_column: ConvertHeaderColumn,
126
+ }.freeze
24
127
  end
25
128
  end
26
129
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # String values considered truthy
5
+ TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
6
+ # String values considered falsy
7
+ FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
8
+
9
+ # Tries to convert value boolean to, returns nil if it can't convert
10
+ ConvertBoolean = proc do |v|
11
+ if TRUTHY.include?(v)
12
+ true
13
+ elsif FALSY.include?(v)
14
+ false
15
+ end
16
+ end
17
+
18
+ # Convert to boolean or raise error
19
+ StrictConvertBoolean = proc do |v|
20
+ ConvertBoolean.call(v).tap do |value|
21
+ raise(ArgumentError, "can't convert #{v} to boolean") if value.nil?
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'time'
5
+
6
+ module HoneyFormat
7
+ # Convert to date
8
+ ConvertDate = proc do |v|
9
+ begin
10
+ StrictConvertDate.call(v)
11
+ rescue ArgumentError, TypeError
12
+ nil
13
+ end
14
+ end
15
+
16
+ # Convert to datetime
17
+ ConvertDatetime = proc do |v|
18
+ begin
19
+ StrictConvertDatetime.call(v)
20
+ rescue ArgumentError, TypeError
21
+ nil
22
+ end
23
+ end
24
+
25
+ # Convert to date or raise error
26
+ StrictConvertDate = proc { |v| Date.parse(v) }
27
+
28
+ # Convert to datetime or raise error
29
+ StrictConvertDatetime = proc { |v| Time.parse(v) }
30
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # Converts decimal or nil
5
+ ConvertDecimal = proc do |v|
6
+ begin
7
+ Float(v)
8
+ rescue ArgumentError, TypeError
9
+ nil
10
+ end
11
+ end
12
+
13
+ # Converts to decimal or zero
14
+ ConvertDecimalOrZero = proc { |v| v.to_f }
15
+
16
+ # Convert to integer or nil
17
+ ConvertInteger = proc do |v|
18
+ begin
19
+ Integer(v)
20
+ rescue ArgumentError, TypeError
21
+ nil
22
+ end
23
+ end
24
+
25
+ # Convert to integer or zero
26
+ ConvertIntegerOrZero = proc { |v| v.to_i }
27
+
28
+ # Convert to decimal or raise error
29
+ StrictConvertDecimal = proc { |v| Float(v) }
30
+
31
+ # Convert to integer or raise error
32
+ StrictConvertInteger = proc { |v| Integer(v) }
33
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'securerandom'
5
+
6
+ module HoneyFormat
7
+ # Convert to downcase or nil
8
+ ConvertDowncase = proc { |v| v&.downcase }
9
+
10
+ # Convert to upcase or nil
11
+ ConvertUpcase = proc { |v| v&.upcase }
12
+
13
+ # Convert to symbol or nil
14
+ ConvertSymbol = proc { |v| v&.to_sym }
15
+
16
+ # Convert to md5 or nil
17
+ ConvertMD5 = proc { |v| Digest::MD5.hexdigest(v) if v }
18
+
19
+ # Convert to hex or nil
20
+ ConvertHex = proc { |v| SecureRandom.hex if v }
21
+
22
+ # Convert to blank string
23
+ ConvertBlank = proc { '' }
24
+
25
+ # Convert header column
26
+ ConvertHeaderColumn = HeaderColumnConverter
27
+
28
+ # Convert to upcase or raise error
29
+ StrictConvertUpcase = proc do |v|
30
+ ConvertUpcase.call(v) || raise(ArgumentError, "can't convert nil to upcased string")
31
+ end
32
+
33
+ # Convert to downcase or raise error
34
+ StrictConvertDowncase = proc do |v|
35
+ ConvertDowncase.call(v) || raise(ArgumentError, "can't convert nil to downcased string")
36
+ end
37
+
38
+ # Convert to symbol or raise error
39
+ StrictConvertSymbol = proc do |v|
40
+ ConvertSymbol.call(v) || raise(ArgumentError, "can't convert nil to symbol")
41
+ end
42
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/converters/header_column_converter'
4
+ require 'honey_format/converters/convert_boolean'
5
+ require 'honey_format/converters/convert_date_and_time'
6
+ require 'honey_format/converters/convert_number'
7
+ require 'honey_format/converters/convert_string'
8
+
9
+ module HoneyFormat
10
+ # Convert to nil
11
+ ConvertNil = proc {}
12
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ # Header column converter
5
+ module HeaderColumnConverter
6
+ # Bracket character matcher
7
+ BRACKETS = /\(|\[|\{|\)|\]|\}/
8
+
9
+ # Separator characters
10
+ SEPS = /'|"|\||\*|\^|\&|%|\$|€|#/
11
+
12
+ # Replace map
13
+ REPLACE_MAP = [
14
+ [/\\/, '/'], # replace "\" with "/"
15
+ [/ \(/, '('], # replace " (" with "("
16
+ [/ \[/, '['], # replace " [" with "["
17
+ [/ \{/, '{'], # replace " {" with "{"
18
+ [/ \{/, '{'], # replace " {" with "{"
19
+ [/\) /, ')'], # replace ") " with ")"
20
+ [/\] /, ']'], # replace "] " with "]"
21
+ [/\} /, '}'], # replace "} " with "}"
22
+ [BRACKETS, '_'], # replace (, [, {, ), ] and } with "_"
23
+ [/ +/, '_'], # replace one or more spaces with "_"
24
+ [/-/, '_'], # replace "-" with "("
25
+ [/::/, '_'], # replace "::" with "_"
26
+ [%r{/}, '_'], # replace "/" with "_"
27
+ [SEPS, '_'], # replace separator chars with "_"
28
+ [/_+/, '_'], # replace one or more "_" with single "_"
29
+ [/\A_+/, ''], # remove leading "_"
30
+ [/_+\z/, ''], # remove trailing "_"
31
+ ].map(&:freeze).freeze
32
+
33
+ # Returns converted value and mutates the argument.
34
+ # @return [Symbol] the cleaned header column.
35
+ # @param [String] column the string to be cleaned.
36
+ # @param [Integer] index the column index.
37
+ # @example Convert simple header
38
+ # HeaderColumnConverter.call(" User name ") #=> "user_name"
39
+ # @example Convert complex header
40
+ # HeaderColumnConverter.call(" First name (user)") #=> :'first_name(user)'
41
+ def self.call(column, index = nil)
42
+ if column.nil? || column.empty?
43
+ raise(ArgumentError, "column and column index can't be blank/nil") unless index
44
+ return :"column#{index}"
45
+ end
46
+
47
+ column = column.dup
48
+ column.strip!
49
+ column.downcase!
50
+ REPLACE_MAP.each do |data|
51
+ from, to = data
52
+ column.gsub!(from, to)
53
+ end
54
+ column.to_sym
55
+ end
56
+ end
57
+ end
@@ -1,8 +1,7 @@
1
- require 'csv'
1
+ # frozen_string_literal: true
2
2
 
3
+ require 'csv'
3
4
  require 'honey_format/matrix'
4
- # require 'honey_format/rows'
5
- # require 'honey_format/header'
6
5
 
7
6
  module HoneyFormat
8
7
  # Represents CSV.
@@ -10,14 +9,18 @@ module HoneyFormat
10
9
  # Instantiate CSV.
11
10
  # @return [CSV] a new instance of CSV.
12
11
  # @param [String] csv the CSV string
13
- # @param [String] delimiter the CSV column delimiter
14
- # @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
15
- # @param [String] quote_character the CSV quote character (default: ")
16
- # @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
17
- # @param [#call] header_converter converts header columns.
18
- # @param [#call] row_builder will be called for each parsed row.
12
+ # @param delimiter [String] the CSV column delimiter
13
+ # @param row_delimiter [String, Symbol] the CSV row delimiter (default: :auto)
14
+ # @param quote_character [String] the CSV quote character (default: ")
15
+ # @param header [Array<String>]
16
+ # header optional argument that represents CSV header, required if the CSV file
17
+ # lacks a header row.
18
+ # @param header_converter [#call] converts header columns.
19
+ # @param header_deduplicator [#call] deduplicates header columns.
20
+ # @param row_builder [#call] will be called for each parsed row.
19
21
  # @param type_map [Hash] map of column_name => type conversion to perform.
20
- # @param skip_lines [Regexp, String] Regexp for determining wheter a line is a comment. See CSV skip_lines option.
22
+ # @param skip_lines [Regexp, String]
23
+ # Regexp for determining wheter a line is a comment. See CSV skip_lines option.
21
24
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
22
25
  # @raise [MissingHeaderError] raised when header is missing (empty or nil).
23
26
  # @raise [MissingHeaderColumnError] raised when header column is missing.
@@ -40,17 +43,21 @@ module HoneyFormat
40
43
  # rescue HoneyFormat::RowError => e
41
44
  # puts "row error: #{e.class}, #{e.message}"
42
45
  # end
46
+ # @example Skip lines all lines starting with '#'
47
+ # csv = HoneyFormat::CSV.new("name,id\n# some comment\njacob,1", skip_lines: '#')
48
+ # csv.rows.length # => 1
43
49
  # @see Matrix#new
44
50
  def initialize(
45
51
  csv,
46
- delimiter: ',',
47
- row_delimiter: :auto,
48
- quote_character: '"',
52
+ delimiter: HoneyFormat.config.delimiter,
53
+ row_delimiter: HoneyFormat.config.row_delimiter,
54
+ quote_character: HoneyFormat.config.quote_character,
49
55
  header: nil,
50
56
  header_converter: HoneyFormat.header_converter,
57
+ header_deduplicator: HoneyFormat.config.header_deduplicator,
51
58
  row_builder: nil,
52
59
  type_map: {},
53
- skip_lines: nil
60
+ skip_lines: HoneyFormat.config.skip_lines
54
61
  )
55
62
  csv = ::CSV.parse(
56
63
  csv,
@@ -64,6 +71,7 @@ module HoneyFormat
64
71
  csv,
65
72
  header: header,
66
73
  header_converter: header_converter,
74
+ header_deduplicator: header_deduplicator,
67
75
  row_builder: row_builder,
68
76
  type_map: type_map
69
77
  )
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HoneyFormat
2
4
  # Errors
3
5
  module Errors
@@ -8,6 +10,10 @@ module HoneyFormat
8
10
  class MissingHeaderError < HeaderError; end
9
11
  # Raised when header column is missing
10
12
  class MissingHeaderColumnError < HeaderError; end
13
+ # Raised when header column duplicate is found
14
+ class DuplicateHeaderColumnError < HeaderError; end
15
+ # Raised when deduplication strategy is unknown
16
+ class UnknownDeduplicationStrategyError < HeaderError; end
11
17
 
12
18
  # Row errors
13
19
  # Super class of errors raised when there is a row error
@@ -19,9 +25,9 @@ module HoneyFormat
19
25
 
20
26
  # Value conversion errors
21
27
  # Raised when value type is unknown
22
- class UnknownValueTypeError < ArgumentError; end
28
+ class UnknownTypeError < ArgumentError; end
23
29
  # Raised when value type already exists
24
- class ValueTypeExistsError < ArgumentError; end
30
+ class TypeExistsError < ArgumentError; end
25
31
  end
26
32
 
27
33
  include Errors
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HoneyFormat
4
+ module Helpers
5
+ # Converts a Hash with key => count to a deduplicated array.
6
+ # @param [Hash] data with key => count
7
+ # @return [Array<Symbol>] an array of symbols
8
+ # @example
9
+ # Helpers.key_count_to_deduplicated_array({ a: 2, b: 1, c: 0})
10
+ # # => [:a, :a1, :b]
11
+ def self.key_count_to_deduplicated_array(data)
12
+ array = []
13
+ count_occurences(data).each do |key, value|
14
+ next array << key if value == 1
15
+
16
+ values = Array.new(value) { |i| i }.map do |index|
17
+ next key if index.zero?
18
+ :"#{key}#{index}"
19
+ end
20
+ array.concat(values)
21
+ end
22
+ array
23
+ end
24
+
25
+ # Returns hash with key => occurrences_count
26
+ # @param [Array<Object>] the array to count occurrences in
27
+ # @return [Hash] key => occurrences_count
28
+ def self.count_occurences(array)
29
+ occurrences = Hash.new(0)
30
+ array.each { |column| occurrences[column] += 1 }
31
+ occurrences
32
+ end
33
+
34
+ # Returns array with duplicated objects
35
+ # @param [Array<Object>] the array to find duplicates in
36
+ # @return [Array<Object>] array of duplicated objects
37
+ def self.duplicated_items(array)
38
+ array.select { |col| array.count(col) > 1 }.uniq
39
+ end
40
+ end
41
+ end
@@ -1,4 +1,6 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require 'honey_format/helpers/helpers'
2
4
 
3
5
  module HoneyFormat
4
6
  # Represents a header
@@ -8,28 +10,36 @@ module HoneyFormat
8
10
  # Instantiate a Header
9
11
  # @return [Header] a new instance of Header.
10
12
  # @param [Array<String>] header array of strings.
11
- # @param converter [#call, Symbol] header converter that implements a #call method that takes one column (string) argument OR symbol for a registered value converter.
13
+ # @param converter [#call, Symbol]
14
+ # header converter that implements a #call method
15
+ # that takes one column (string) argument OR symbol for a registered
16
+ # converter registry.
17
+ # @param deduplicator [#call, Symbol]
18
+ # header deduplicator that implements a #call method
19
+ # that takes columns Array<String> argument OR symbol for a registered
20
+ # deduplicator registry.
12
21
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
13
22
  # @raise [MissingHeaderColumnError] raised when header is missing
14
23
  # @example Instantiate a header with a custom converter
15
24
  # converter = ->(col) { col == 'username' ? 'handle' : col }
16
25
  # header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
17
26
  # header.to_a # => ['name', 'handle']
18
- def initialize(header, converter: HoneyFormat.header_converter)
27
+ def initialize(
28
+ header,
29
+ converter: HoneyFormat.header_converter,
30
+ deduplicator: HoneyFormat.config.header_deduplicator
31
+ )
19
32
  if header.nil? || header.empty?
20
33
  raise(Errors::MissingHeaderError, "CSV header can't be empty.")
21
34
  end
22
35
 
23
36
  @original_header = header
24
- @converter = if converter.is_a?(Symbol)
25
- HoneyFormat.value_converter[converter]
26
- else
27
- converter
28
- end
29
-
37
+ self.deduplicator = deduplicator
38
+ self.converter = converter
30
39
  @columns = build_columns(@original_header)
31
40
  end
32
41
 
42
+ # Returns the original header
33
43
  # @return [Array<String>] the original header
34
44
  def original
35
45
  @original_header
@@ -82,15 +92,41 @@ module HoneyFormat
82
92
 
83
93
  private
84
94
 
95
+ # Set the header converter
96
+ # @param [Symbol, #call] symbol to known converter or object that responds to #call
97
+ # @return [nil]
98
+ def converter=(object)
99
+ if object.is_a?(Symbol)
100
+ @converter = HoneyFormat.converter_registry[object]
101
+ return
102
+ end
103
+
104
+ @converter = object
105
+ end
106
+
107
+ # Set the header deduplicator
108
+ # @param [Symbol, #call] symbol to known deduplicator or object that responds to #call
109
+ # @return [nil]
110
+ def deduplicator=(object)
111
+ if object.is_a?(Symbol)
112
+ @deduplicator = HoneyFormat.header_deduplicator_registry[object]
113
+ return
114
+ end
115
+
116
+ @deduplicator = object
117
+ end
118
+
85
119
  # Convert original header
86
120
  # @param [Array<String>] header the original header
87
121
  # @return [Array<String>] converted columns
88
122
  def build_columns(header)
89
- header.each_with_index.map do |header_column, index|
123
+ columns = header.each_with_index.map do |header_column, index|
90
124
  convert_column(header_column, index).tap do |column|
91
125
  maybe_raise_missing_column!(column)
92
126
  end
93
127
  end
128
+
129
+ @deduplicator.call(columns)
94
130
  end
95
131
 
96
132
  # Convert the column value
@@ -122,8 +158,8 @@ module HoneyFormat
122
158
 
123
159
  parts = [
124
160
  "CSV header column can't be nil or empty!",
125
- "When you pass your own converter make sure that it never returns nil or an empty string.",
126
- 'Instead generate unique columns names.'
161
+ 'When you pass your own converter make sure that it never returns nil or an empty string.', # rubocop:disable Metrics/LineLength
162
+ 'Instead generate unique columns names.',
127
163
  ]
128
164
  raise(Errors::MissingHeaderColumnError, parts.join(' '))
129
165
  end