honey_format 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.hound.yml +3 -0
- data/.rubocop.yml +7 -0
- data/.ruby-style-guide.yml +264 -0
- data/CHANGELOG.md +15 -0
- data/Gemfile +2 -0
- data/README.md +63 -15
- data/Rakefile +2 -0
- data/bin/benchmark +2 -0
- data/bin/console +1 -0
- data/exe/honey_format +1 -0
- data/honey_format.gemspec +5 -4
- data/lib/honey_format/cli/benchmark_cli.rb +15 -13
- data/lib/honey_format/cli/cli.rb +17 -12
- data/lib/honey_format/cli/result_writer.rb +2 -0
- data/lib/honey_format/configuration.rb +114 -11
- data/lib/honey_format/converters/convert_boolean.rb +24 -0
- data/lib/honey_format/converters/convert_date_and_time.rb +30 -0
- data/lib/honey_format/converters/convert_number.rb +33 -0
- data/lib/honey_format/converters/convert_string.rb +42 -0
- data/lib/honey_format/converters/converters.rb +12 -0
- data/lib/honey_format/converters/header_column_converter.rb +57 -0
- data/lib/honey_format/csv.rb +22 -14
- data/lib/honey_format/errors.rb +8 -2
- data/lib/honey_format/helpers/helpers.rb +41 -0
- data/lib/honey_format/{header.rb → matrix/header.rb} +48 -12
- data/lib/honey_format/matrix/matrix.rb +104 -0
- data/lib/honey_format/{row.rb → matrix/row.rb} +6 -3
- data/lib/honey_format/{row_builder.rb → matrix/row_builder.rb} +5 -4
- data/lib/honey_format/{rows.rb → matrix/rows.rb} +7 -4
- data/lib/honey_format/matrix.rb +6 -89
- data/lib/honey_format/registry.rb +99 -0
- data/lib/honey_format/version.rb +4 -2
- data/lib/honey_format.rb +14 -6
- metadata +34 -24
- data/lib/honey_format/header_column_converter.rb +0 -40
- data/lib/honey_format/value_converter.rb +0 -117
data/lib/honey_format/cli/cli.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'optparse'
|
2
4
|
|
3
5
|
module HoneyFormat
|
@@ -15,8 +17,11 @@ module HoneyFormat
|
|
15
17
|
|
16
18
|
private
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
+
# Puts to configured IO
|
21
|
+
# @param [#to_s] object to print
|
22
|
+
# @return [nil] nil
|
23
|
+
def puts(arg)
|
24
|
+
@io.puts(arg)
|
20
25
|
end
|
21
26
|
|
22
27
|
# Parse command line arguments and return options
|
@@ -33,26 +38,26 @@ module HoneyFormat
|
|
33
38
|
type_map = {}
|
34
39
|
|
35
40
|
OptionParser.new do |parser|
|
36
|
-
parser.banner =
|
41
|
+
parser.banner = 'Usage: honey_format [options] <file.csv>'
|
37
42
|
parser.default_argv = argv
|
38
43
|
|
39
|
-
parser.on(
|
44
|
+
parser.on('--csv=input.csv', String, 'CSV file') do |value|
|
40
45
|
input_path = value
|
41
46
|
end
|
42
47
|
|
43
|
-
parser.on(
|
48
|
+
parser.on('--columns=id,name', Array, 'Select columns') do |value|
|
44
49
|
columns = value&.map(&:to_sym)
|
45
50
|
end
|
46
51
|
|
47
|
-
parser.on(
|
52
|
+
parser.on('--output=output.csv', String, 'CSV output (STDOUT otherwise)') do |value|
|
48
53
|
output_path = value
|
49
54
|
end
|
50
55
|
|
51
|
-
parser.on(
|
56
|
+
parser.on('--delimiter=,', String, 'CSV delimiter (default: ,)') do |value|
|
52
57
|
delimiter = value
|
53
58
|
end
|
54
59
|
|
55
|
-
parser.on(
|
60
|
+
parser.on('--skip-lines=,', String, 'Skip lines that match this pattern') do |value|
|
56
61
|
skip_lines = value
|
57
62
|
end
|
58
63
|
|
@@ -60,15 +65,15 @@ module HoneyFormat
|
|
60
65
|
type_map = option_to_h(value || [])
|
61
66
|
end
|
62
67
|
|
63
|
-
parser.on(
|
68
|
+
parser.on('--[no-]header-only', 'Print only the header') do |value|
|
64
69
|
header_only = value
|
65
70
|
end
|
66
71
|
|
67
|
-
parser.on(
|
72
|
+
parser.on('--[no-]rows-only', 'Print only the rows') do |value|
|
68
73
|
rows_only = value
|
69
74
|
end
|
70
75
|
|
71
|
-
parser.on(
|
76
|
+
parser.on('-h', '--help', 'How to use') do
|
72
77
|
puts parser
|
73
78
|
exit
|
74
79
|
end
|
@@ -96,7 +101,7 @@ module HoneyFormat
|
|
96
101
|
header_only: header_only,
|
97
102
|
rows_only: rows_only,
|
98
103
|
skip_lines: skip_lines,
|
99
|
-
type_map: type_map
|
104
|
+
type_map: type_map,
|
100
105
|
}
|
101
106
|
end
|
102
107
|
|
@@ -1,26 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/helpers/helpers'
|
4
|
+
|
1
5
|
module HoneyFormat
|
2
6
|
# Holds HoneyFormat configuration
|
3
|
-
# @
|
4
|
-
# @
|
5
|
-
# @
|
6
|
-
# @
|
7
|
+
# @attr_accessor [String] delimiter the default column delimiter (default: ,)
|
8
|
+
# @attr_accessor [String, Symbol] row_delimiter the default row delimiter (default: :auto)
|
9
|
+
# @attr_accessor [String] quote_character the default quote character (default: ")
|
10
|
+
# @attr_accessor [String, Regexp] skip_lines skip all lines matching pattern (default: nil)
|
7
11
|
class Configuration
|
8
|
-
attr_accessor :
|
12
|
+
attr_accessor :delimiter, :row_delimiter, :quote_character, :skip_lines
|
9
13
|
|
10
14
|
# Instantiate configuration
|
11
15
|
def initialize
|
12
|
-
@
|
13
|
-
@header_converter =
|
16
|
+
@converter_registry = nil
|
17
|
+
@header_converter = nil
|
18
|
+
@header_deduplicator = nil
|
19
|
+
@delimiter = ','
|
20
|
+
@row_delimiter = :auto
|
21
|
+
@quote_character = '"'
|
22
|
+
@skip_lines = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the header converter
|
26
|
+
# @return [#call] header_converter the configured header converter
|
27
|
+
def header_converter
|
28
|
+
@header_converter ||= converter_registry[:header_column]
|
14
29
|
end
|
15
30
|
|
16
31
|
# Set the header converter
|
17
|
-
# @param [Symbol, #call] converter for registered
|
32
|
+
# @param [Symbol, #call] converter for registered converter registry or object that
|
33
|
+
# responds to #call
|
18
34
|
# @return [#call] the header converter
|
19
35
|
def header_converter=(converter)
|
20
|
-
if converter.is_a?(Symbol)
|
21
|
-
|
36
|
+
@header_converter = if converter.is_a?(Symbol)
|
37
|
+
converter_registry[converter]
|
38
|
+
else
|
39
|
+
converter
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the deduplication header strategy
|
44
|
+
# @return [#call] the header deduplication strategy
|
45
|
+
def header_deduplicator
|
46
|
+
@header_deduplicator ||= header_deduplicator_registry[:deduplicate]
|
47
|
+
end
|
48
|
+
|
49
|
+
# Set the deduplication header strategy
|
50
|
+
# @param [Symbol, #call]
|
51
|
+
# symbol with known strategy identifier or method that responds
|
52
|
+
# to #call(colums, key_count)
|
53
|
+
# @return [#call] the header deduplication strategy
|
54
|
+
# @raise [UnknownDeduplicationStrategyError]
|
55
|
+
def header_deduplicator=(strategy)
|
56
|
+
if header_deduplicator_registry.type?(strategy)
|
57
|
+
@header_deduplicator = header_deduplicator_registry[strategy]
|
58
|
+
elsif strategy.respond_to?(:call)
|
59
|
+
@header_deduplicator = strategy
|
60
|
+
else
|
61
|
+
message = "unknown deduplication strategy: '#{strategy}'"
|
62
|
+
raise(Errors::UnknownDeduplicationStrategyError, message)
|
22
63
|
end
|
23
|
-
|
64
|
+
end
|
65
|
+
|
66
|
+
# Default header deduplicate strategies
|
67
|
+
# @return [Hash] the default header deduplicatation strategies
|
68
|
+
def default_header_deduplicators
|
69
|
+
@default_header_deduplicators ||= {
|
70
|
+
deduplicate: proc do |columns|
|
71
|
+
Helpers.key_count_to_deduplicated_array(columns)
|
72
|
+
end,
|
73
|
+
raise: proc do |columns|
|
74
|
+
duplicates = Helpers.duplicated_items(columns)
|
75
|
+
if duplicates.any?
|
76
|
+
message = "all columns must be unique, duplicates are: #{duplicates}"
|
77
|
+
raise(Errors::DuplicateHeaderColumnError, message)
|
78
|
+
end
|
79
|
+
columns
|
80
|
+
end,
|
81
|
+
none: proc { |columns| columns },
|
82
|
+
}.freeze
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns the column deduplication registry
|
86
|
+
# @return [#call] column deduplication registry
|
87
|
+
def header_deduplicator_registry
|
88
|
+
@header_deduplicator_registry ||= Registry.new(default_header_deduplicators)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns the converter registry
|
92
|
+
# @return [#call] converter the configured converter registry
|
93
|
+
def converter_registry
|
94
|
+
@converter_registry ||= Registry.new(default_converters)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Default converter registry
|
98
|
+
# @return [Hash] hash with default converters
|
99
|
+
def default_converters
|
100
|
+
@default_converters ||= {
|
101
|
+
# strict variants
|
102
|
+
decimal!: StrictConvertDecimal,
|
103
|
+
integer!: StrictConvertInteger,
|
104
|
+
date!: StrictConvertDate,
|
105
|
+
datetime!: StrictConvertDatetime,
|
106
|
+
symbol!: StrictConvertSymbol,
|
107
|
+
downcase!: StrictConvertDowncase,
|
108
|
+
upcase!: StrictConvertUpcase,
|
109
|
+
boolean!: StrictConvertBoolean,
|
110
|
+
# safe variants
|
111
|
+
decimal: ConvertDecimal,
|
112
|
+
decimal_or_zero: ConvertDecimalOrZero,
|
113
|
+
integer: ConvertInteger,
|
114
|
+
integer_or_zero: ConvertIntegerOrZero,
|
115
|
+
date: ConvertDate,
|
116
|
+
datetime: ConvertDatetime,
|
117
|
+
symbol: ConvertSymbol,
|
118
|
+
downcase: ConvertDowncase,
|
119
|
+
upcase: ConvertUpcase,
|
120
|
+
boolean: ConvertBoolean,
|
121
|
+
md5: ConvertMD5,
|
122
|
+
hex: ConvertHex,
|
123
|
+
nil: ConvertNil,
|
124
|
+
blank: ConvertBlank,
|
125
|
+
header_column: ConvertHeaderColumn,
|
126
|
+
}.freeze
|
24
127
|
end
|
25
128
|
end
|
26
129
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# String values considered truthy
|
5
|
+
TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
|
6
|
+
# String values considered falsy
|
7
|
+
FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
|
8
|
+
|
9
|
+
# Tries to convert value boolean to, returns nil if it can't convert
|
10
|
+
ConvertBoolean = proc do |v|
|
11
|
+
if TRUTHY.include?(v)
|
12
|
+
true
|
13
|
+
elsif FALSY.include?(v)
|
14
|
+
false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Convert to boolean or raise error
|
19
|
+
StrictConvertBoolean = proc do |v|
|
20
|
+
ConvertBoolean.call(v).tap do |value|
|
21
|
+
raise(ArgumentError, "can't convert #{v} to boolean") if value.nil?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
module HoneyFormat
|
7
|
+
# Convert to date
|
8
|
+
ConvertDate = proc do |v|
|
9
|
+
begin
|
10
|
+
StrictConvertDate.call(v)
|
11
|
+
rescue ArgumentError, TypeError
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Convert to datetime
|
17
|
+
ConvertDatetime = proc do |v|
|
18
|
+
begin
|
19
|
+
StrictConvertDatetime.call(v)
|
20
|
+
rescue ArgumentError, TypeError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to date or raise error
|
26
|
+
StrictConvertDate = proc { |v| Date.parse(v) }
|
27
|
+
|
28
|
+
# Convert to datetime or raise error
|
29
|
+
StrictConvertDatetime = proc { |v| Time.parse(v) }
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Converts decimal or nil
|
5
|
+
ConvertDecimal = proc do |v|
|
6
|
+
begin
|
7
|
+
Float(v)
|
8
|
+
rescue ArgumentError, TypeError
|
9
|
+
nil
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Converts to decimal or zero
|
14
|
+
ConvertDecimalOrZero = proc { |v| v.to_f }
|
15
|
+
|
16
|
+
# Convert to integer or nil
|
17
|
+
ConvertInteger = proc do |v|
|
18
|
+
begin
|
19
|
+
Integer(v)
|
20
|
+
rescue ArgumentError, TypeError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to integer or zero
|
26
|
+
ConvertIntegerOrZero = proc { |v| v.to_i }
|
27
|
+
|
28
|
+
# Convert to decimal or raise error
|
29
|
+
StrictConvertDecimal = proc { |v| Float(v) }
|
30
|
+
|
31
|
+
# Convert to integer or raise error
|
32
|
+
StrictConvertInteger = proc { |v| Integer(v) }
|
33
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'digest'
|
4
|
+
require 'securerandom'
|
5
|
+
|
6
|
+
module HoneyFormat
|
7
|
+
# Convert to downcase or nil
|
8
|
+
ConvertDowncase = proc { |v| v&.downcase }
|
9
|
+
|
10
|
+
# Convert to upcase or nil
|
11
|
+
ConvertUpcase = proc { |v| v&.upcase }
|
12
|
+
|
13
|
+
# Convert to symbol or nil
|
14
|
+
ConvertSymbol = proc { |v| v&.to_sym }
|
15
|
+
|
16
|
+
# Convert to md5 or nil
|
17
|
+
ConvertMD5 = proc { |v| Digest::MD5.hexdigest(v) if v }
|
18
|
+
|
19
|
+
# Convert to hex or nil
|
20
|
+
ConvertHex = proc { |v| SecureRandom.hex if v }
|
21
|
+
|
22
|
+
# Convert to blank string
|
23
|
+
ConvertBlank = proc { '' }
|
24
|
+
|
25
|
+
# Convert header column
|
26
|
+
ConvertHeaderColumn = HeaderColumnConverter
|
27
|
+
|
28
|
+
# Convert to upcase or raise error
|
29
|
+
StrictConvertUpcase = proc do |v|
|
30
|
+
ConvertUpcase.call(v) || raise(ArgumentError, "can't convert nil to upcased string")
|
31
|
+
end
|
32
|
+
|
33
|
+
# Convert to downcase or raise error
|
34
|
+
StrictConvertDowncase = proc do |v|
|
35
|
+
ConvertDowncase.call(v) || raise(ArgumentError, "can't convert nil to downcased string")
|
36
|
+
end
|
37
|
+
|
38
|
+
# Convert to symbol or raise error
|
39
|
+
StrictConvertSymbol = proc do |v|
|
40
|
+
ConvertSymbol.call(v) || raise(ArgumentError, "can't convert nil to symbol")
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/converters/header_column_converter'
|
4
|
+
require 'honey_format/converters/convert_boolean'
|
5
|
+
require 'honey_format/converters/convert_date_and_time'
|
6
|
+
require 'honey_format/converters/convert_number'
|
7
|
+
require 'honey_format/converters/convert_string'
|
8
|
+
|
9
|
+
module HoneyFormat
|
10
|
+
# Convert to nil
|
11
|
+
ConvertNil = proc {}
|
12
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Header column converter
|
5
|
+
module HeaderColumnConverter
|
6
|
+
# Bracket character matcher
|
7
|
+
BRACKETS = /\(|\[|\{|\)|\]|\}/
|
8
|
+
|
9
|
+
# Separator characters
|
10
|
+
SEPS = /'|"|\||\*|\^|\&|%|\$|€|#/
|
11
|
+
|
12
|
+
# Replace map
|
13
|
+
REPLACE_MAP = [
|
14
|
+
[/\\/, '/'], # replace "\" with "/"
|
15
|
+
[/ \(/, '('], # replace " (" with "("
|
16
|
+
[/ \[/, '['], # replace " [" with "["
|
17
|
+
[/ \{/, '{'], # replace " {" with "{"
|
18
|
+
[/ \{/, '{'], # replace " {" with "{"
|
19
|
+
[/\) /, ')'], # replace ") " with ")"
|
20
|
+
[/\] /, ']'], # replace "] " with "]"
|
21
|
+
[/\} /, '}'], # replace "} " with "}"
|
22
|
+
[BRACKETS, '_'], # replace (, [, {, ), ] and } with "_"
|
23
|
+
[/ +/, '_'], # replace one or more spaces with "_"
|
24
|
+
[/-/, '_'], # replace "-" with "("
|
25
|
+
[/::/, '_'], # replace "::" with "_"
|
26
|
+
[%r{/}, '_'], # replace "/" with "_"
|
27
|
+
[SEPS, '_'], # replace separator chars with "_"
|
28
|
+
[/_+/, '_'], # replace one or more "_" with single "_"
|
29
|
+
[/\A_+/, ''], # remove leading "_"
|
30
|
+
[/_+\z/, ''], # remove trailing "_"
|
31
|
+
].map(&:freeze).freeze
|
32
|
+
|
33
|
+
# Returns converted value and mutates the argument.
|
34
|
+
# @return [Symbol] the cleaned header column.
|
35
|
+
# @param [String] column the string to be cleaned.
|
36
|
+
# @param [Integer] index the column index.
|
37
|
+
# @example Convert simple header
|
38
|
+
# HeaderColumnConverter.call(" User name ") #=> "user_name"
|
39
|
+
# @example Convert complex header
|
40
|
+
# HeaderColumnConverter.call(" First name (user)") #=> :'first_name(user)'
|
41
|
+
def self.call(column, index = nil)
|
42
|
+
if column.nil? || column.empty?
|
43
|
+
raise(ArgumentError, "column and column index can't be blank/nil") unless index
|
44
|
+
return :"column#{index}"
|
45
|
+
end
|
46
|
+
|
47
|
+
column = column.dup
|
48
|
+
column.strip!
|
49
|
+
column.downcase!
|
50
|
+
REPLACE_MAP.each do |data|
|
51
|
+
from, to = data
|
52
|
+
column.gsub!(from, to)
|
53
|
+
end
|
54
|
+
column.to_sym
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'csv'
|
3
4
|
require 'honey_format/matrix'
|
4
|
-
# require 'honey_format/rows'
|
5
|
-
# require 'honey_format/header'
|
6
5
|
|
7
6
|
module HoneyFormat
|
8
7
|
# Represents CSV.
|
@@ -10,14 +9,18 @@ module HoneyFormat
|
|
10
9
|
# Instantiate CSV.
|
11
10
|
# @return [CSV] a new instance of CSV.
|
12
11
|
# @param [String] csv the CSV string
|
13
|
-
# @param [String]
|
14
|
-
# @param [String, Symbol]
|
15
|
-
# @param [String]
|
16
|
-
# @param [Array<String>]
|
17
|
-
#
|
18
|
-
#
|
12
|
+
# @param delimiter [String] the CSV column delimiter
|
13
|
+
# @param row_delimiter [String, Symbol] the CSV row delimiter (default: :auto)
|
14
|
+
# @param quote_character [String] the CSV quote character (default: ")
|
15
|
+
# @param header [Array<String>]
|
16
|
+
# header optional argument that represents CSV header, required if the CSV file
|
17
|
+
# lacks a header row.
|
18
|
+
# @param header_converter [#call] converts header columns.
|
19
|
+
# @param header_deduplicator [#call] deduplicates header columns.
|
20
|
+
# @param row_builder [#call] will be called for each parsed row.
|
19
21
|
# @param type_map [Hash] map of column_name => type conversion to perform.
|
20
|
-
# @param skip_lines [Regexp, String]
|
22
|
+
# @param skip_lines [Regexp, String]
|
23
|
+
# Regexp for determining wheter a line is a comment. See CSV skip_lines option.
|
21
24
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
22
25
|
# @raise [MissingHeaderError] raised when header is missing (empty or nil).
|
23
26
|
# @raise [MissingHeaderColumnError] raised when header column is missing.
|
@@ -40,17 +43,21 @@ module HoneyFormat
|
|
40
43
|
# rescue HoneyFormat::RowError => e
|
41
44
|
# puts "row error: #{e.class}, #{e.message}"
|
42
45
|
# end
|
46
|
+
# @example Skip lines all lines starting with '#'
|
47
|
+
# csv = HoneyFormat::CSV.new("name,id\n# some comment\njacob,1", skip_lines: '#')
|
48
|
+
# csv.rows.length # => 1
|
43
49
|
# @see Matrix#new
|
44
50
|
def initialize(
|
45
51
|
csv,
|
46
|
-
delimiter:
|
47
|
-
row_delimiter:
|
48
|
-
quote_character:
|
52
|
+
delimiter: HoneyFormat.config.delimiter,
|
53
|
+
row_delimiter: HoneyFormat.config.row_delimiter,
|
54
|
+
quote_character: HoneyFormat.config.quote_character,
|
49
55
|
header: nil,
|
50
56
|
header_converter: HoneyFormat.header_converter,
|
57
|
+
header_deduplicator: HoneyFormat.config.header_deduplicator,
|
51
58
|
row_builder: nil,
|
52
59
|
type_map: {},
|
53
|
-
skip_lines:
|
60
|
+
skip_lines: HoneyFormat.config.skip_lines
|
54
61
|
)
|
55
62
|
csv = ::CSV.parse(
|
56
63
|
csv,
|
@@ -64,6 +71,7 @@ module HoneyFormat
|
|
64
71
|
csv,
|
65
72
|
header: header,
|
66
73
|
header_converter: header_converter,
|
74
|
+
header_deduplicator: header_deduplicator,
|
67
75
|
row_builder: row_builder,
|
68
76
|
type_map: type_map
|
69
77
|
)
|
data/lib/honey_format/errors.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module HoneyFormat
|
2
4
|
# Errors
|
3
5
|
module Errors
|
@@ -8,6 +10,10 @@ module HoneyFormat
|
|
8
10
|
class MissingHeaderError < HeaderError; end
|
9
11
|
# Raised when header column is missing
|
10
12
|
class MissingHeaderColumnError < HeaderError; end
|
13
|
+
# Raised when header column duplicate is found
|
14
|
+
class DuplicateHeaderColumnError < HeaderError; end
|
15
|
+
# Raised when deduplication strategy is unknown
|
16
|
+
class UnknownDeduplicationStrategyError < HeaderError; end
|
11
17
|
|
12
18
|
# Row errors
|
13
19
|
# Super class of errors raised when there is a row error
|
@@ -19,9 +25,9 @@ module HoneyFormat
|
|
19
25
|
|
20
26
|
# Value conversion errors
|
21
27
|
# Raised when value type is unknown
|
22
|
-
class
|
28
|
+
class UnknownTypeError < ArgumentError; end
|
23
29
|
# Raised when value type already exists
|
24
|
-
class
|
30
|
+
class TypeExistsError < ArgumentError; end
|
25
31
|
end
|
26
32
|
|
27
33
|
include Errors
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
module Helpers
|
5
|
+
# Converts a Hash with key => count to a deduplicated array.
|
6
|
+
# @param [Hash] data with key => count
|
7
|
+
# @return [Array<Symbol>] an array of symbols
|
8
|
+
# @example
|
9
|
+
# Helpers.key_count_to_deduplicated_array({ a: 2, b: 1, c: 0})
|
10
|
+
# # => [:a, :a1, :b]
|
11
|
+
def self.key_count_to_deduplicated_array(data)
|
12
|
+
array = []
|
13
|
+
count_occurences(data).each do |key, value|
|
14
|
+
next array << key if value == 1
|
15
|
+
|
16
|
+
values = Array.new(value) { |i| i }.map do |index|
|
17
|
+
next key if index.zero?
|
18
|
+
:"#{key}#{index}"
|
19
|
+
end
|
20
|
+
array.concat(values)
|
21
|
+
end
|
22
|
+
array
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns hash with key => occurrences_count
|
26
|
+
# @param [Array<Object>] the array to count occurrences in
|
27
|
+
# @return [Hash] key => occurrences_count
|
28
|
+
def self.count_occurences(array)
|
29
|
+
occurrences = Hash.new(0)
|
30
|
+
array.each { |column| occurrences[column] += 1 }
|
31
|
+
occurrences
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns array with duplicated objects
|
35
|
+
# @param [Array<Object>] the array to find duplicates in
|
36
|
+
# @return [Array<Object>] array of duplicated objects
|
37
|
+
def self.duplicated_items(array)
|
38
|
+
array.select { |col| array.count(col) > 1 }.uniq
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/helpers/helpers'
|
2
4
|
|
3
5
|
module HoneyFormat
|
4
6
|
# Represents a header
|
@@ -8,28 +10,36 @@ module HoneyFormat
|
|
8
10
|
# Instantiate a Header
|
9
11
|
# @return [Header] a new instance of Header.
|
10
12
|
# @param [Array<String>] header array of strings.
|
11
|
-
# @param converter [#call, Symbol]
|
13
|
+
# @param converter [#call, Symbol]
|
14
|
+
# header converter that implements a #call method
|
15
|
+
# that takes one column (string) argument OR symbol for a registered
|
16
|
+
# converter registry.
|
17
|
+
# @param deduplicator [#call, Symbol]
|
18
|
+
# header deduplicator that implements a #call method
|
19
|
+
# that takes columns Array<String> argument OR symbol for a registered
|
20
|
+
# deduplicator registry.
|
12
21
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
13
22
|
# @raise [MissingHeaderColumnError] raised when header is missing
|
14
23
|
# @example Instantiate a header with a custom converter
|
15
24
|
# converter = ->(col) { col == 'username' ? 'handle' : col }
|
16
25
|
# header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
|
17
26
|
# header.to_a # => ['name', 'handle']
|
18
|
-
def initialize(
|
27
|
+
def initialize(
|
28
|
+
header,
|
29
|
+
converter: HoneyFormat.header_converter,
|
30
|
+
deduplicator: HoneyFormat.config.header_deduplicator
|
31
|
+
)
|
19
32
|
if header.nil? || header.empty?
|
20
33
|
raise(Errors::MissingHeaderError, "CSV header can't be empty.")
|
21
34
|
end
|
22
35
|
|
23
36
|
@original_header = header
|
24
|
-
|
25
|
-
|
26
|
-
else
|
27
|
-
converter
|
28
|
-
end
|
29
|
-
|
37
|
+
self.deduplicator = deduplicator
|
38
|
+
self.converter = converter
|
30
39
|
@columns = build_columns(@original_header)
|
31
40
|
end
|
32
41
|
|
42
|
+
# Returns the original header
|
33
43
|
# @return [Array<String>] the original header
|
34
44
|
def original
|
35
45
|
@original_header
|
@@ -82,15 +92,41 @@ module HoneyFormat
|
|
82
92
|
|
83
93
|
private
|
84
94
|
|
95
|
+
# Set the header converter
|
96
|
+
# @param [Symbol, #call] symbol to known converter or object that responds to #call
|
97
|
+
# @return [nil]
|
98
|
+
def converter=(object)
|
99
|
+
if object.is_a?(Symbol)
|
100
|
+
@converter = HoneyFormat.converter_registry[object]
|
101
|
+
return
|
102
|
+
end
|
103
|
+
|
104
|
+
@converter = object
|
105
|
+
end
|
106
|
+
|
107
|
+
# Set the header deduplicator
|
108
|
+
# @param [Symbol, #call] symbol to known deduplicator or object that responds to #call
|
109
|
+
# @return [nil]
|
110
|
+
def deduplicator=(object)
|
111
|
+
if object.is_a?(Symbol)
|
112
|
+
@deduplicator = HoneyFormat.header_deduplicator_registry[object]
|
113
|
+
return
|
114
|
+
end
|
115
|
+
|
116
|
+
@deduplicator = object
|
117
|
+
end
|
118
|
+
|
85
119
|
# Convert original header
|
86
120
|
# @param [Array<String>] header the original header
|
87
121
|
# @return [Array<String>] converted columns
|
88
122
|
def build_columns(header)
|
89
|
-
header.each_with_index.map do |header_column, index|
|
123
|
+
columns = header.each_with_index.map do |header_column, index|
|
90
124
|
convert_column(header_column, index).tap do |column|
|
91
125
|
maybe_raise_missing_column!(column)
|
92
126
|
end
|
93
127
|
end
|
128
|
+
|
129
|
+
@deduplicator.call(columns)
|
94
130
|
end
|
95
131
|
|
96
132
|
# Convert the column value
|
@@ -122,8 +158,8 @@ module HoneyFormat
|
|
122
158
|
|
123
159
|
parts = [
|
124
160
|
"CSV header column can't be nil or empty!",
|
125
|
-
|
126
|
-
'Instead generate unique columns names.'
|
161
|
+
'When you pass your own converter make sure that it never returns nil or an empty string.', # rubocop:disable Metrics/LineLength
|
162
|
+
'Instead generate unique columns names.',
|
127
163
|
]
|
128
164
|
raise(Errors::MissingHeaderColumnError, parts.join(' '))
|
129
165
|
end
|