honey_format 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +3 -0
- data/.rubocop.yml +7 -0
- data/.ruby-style-guide.yml +264 -0
- data/CHANGELOG.md +15 -0
- data/Gemfile +2 -0
- data/README.md +63 -15
- data/Rakefile +2 -0
- data/bin/benchmark +2 -0
- data/bin/console +1 -0
- data/exe/honey_format +1 -0
- data/honey_format.gemspec +5 -4
- data/lib/honey_format/cli/benchmark_cli.rb +15 -13
- data/lib/honey_format/cli/cli.rb +17 -12
- data/lib/honey_format/cli/result_writer.rb +2 -0
- data/lib/honey_format/configuration.rb +114 -11
- data/lib/honey_format/converters/convert_boolean.rb +24 -0
- data/lib/honey_format/converters/convert_date_and_time.rb +30 -0
- data/lib/honey_format/converters/convert_number.rb +33 -0
- data/lib/honey_format/converters/convert_string.rb +42 -0
- data/lib/honey_format/converters/converters.rb +12 -0
- data/lib/honey_format/converters/header_column_converter.rb +57 -0
- data/lib/honey_format/csv.rb +22 -14
- data/lib/honey_format/errors.rb +8 -2
- data/lib/honey_format/helpers/helpers.rb +41 -0
- data/lib/honey_format/{header.rb → matrix/header.rb} +48 -12
- data/lib/honey_format/matrix/matrix.rb +104 -0
- data/lib/honey_format/{row.rb → matrix/row.rb} +6 -3
- data/lib/honey_format/{row_builder.rb → matrix/row_builder.rb} +5 -4
- data/lib/honey_format/{rows.rb → matrix/rows.rb} +7 -4
- data/lib/honey_format/matrix.rb +6 -89
- data/lib/honey_format/registry.rb +99 -0
- data/lib/honey_format/version.rb +4 -2
- data/lib/honey_format.rb +14 -6
- metadata +34 -24
- data/lib/honey_format/header_column_converter.rb +0 -40
- data/lib/honey_format/value_converter.rb +0 -117
data/lib/honey_format/cli/cli.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'optparse'
|
2
4
|
|
3
5
|
module HoneyFormat
|
@@ -15,8 +17,11 @@ module HoneyFormat
|
|
15
17
|
|
16
18
|
private
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
+
# Puts to configured IO
|
21
|
+
# @param [#to_s] object to print
|
22
|
+
# @return [nil] nil
|
23
|
+
def puts(arg)
|
24
|
+
@io.puts(arg)
|
20
25
|
end
|
21
26
|
|
22
27
|
# Parse command line arguments and return options
|
@@ -33,26 +38,26 @@ module HoneyFormat
|
|
33
38
|
type_map = {}
|
34
39
|
|
35
40
|
OptionParser.new do |parser|
|
36
|
-
parser.banner =
|
41
|
+
parser.banner = 'Usage: honey_format [options] <file.csv>'
|
37
42
|
parser.default_argv = argv
|
38
43
|
|
39
|
-
parser.on(
|
44
|
+
parser.on('--csv=input.csv', String, 'CSV file') do |value|
|
40
45
|
input_path = value
|
41
46
|
end
|
42
47
|
|
43
|
-
parser.on(
|
48
|
+
parser.on('--columns=id,name', Array, 'Select columns') do |value|
|
44
49
|
columns = value&.map(&:to_sym)
|
45
50
|
end
|
46
51
|
|
47
|
-
parser.on(
|
52
|
+
parser.on('--output=output.csv', String, 'CSV output (STDOUT otherwise)') do |value|
|
48
53
|
output_path = value
|
49
54
|
end
|
50
55
|
|
51
|
-
parser.on(
|
56
|
+
parser.on('--delimiter=,', String, 'CSV delimiter (default: ,)') do |value|
|
52
57
|
delimiter = value
|
53
58
|
end
|
54
59
|
|
55
|
-
parser.on(
|
60
|
+
parser.on('--skip-lines=,', String, 'Skip lines that match this pattern') do |value|
|
56
61
|
skip_lines = value
|
57
62
|
end
|
58
63
|
|
@@ -60,15 +65,15 @@ module HoneyFormat
|
|
60
65
|
type_map = option_to_h(value || [])
|
61
66
|
end
|
62
67
|
|
63
|
-
parser.on(
|
68
|
+
parser.on('--[no-]header-only', 'Print only the header') do |value|
|
64
69
|
header_only = value
|
65
70
|
end
|
66
71
|
|
67
|
-
parser.on(
|
72
|
+
parser.on('--[no-]rows-only', 'Print only the rows') do |value|
|
68
73
|
rows_only = value
|
69
74
|
end
|
70
75
|
|
71
|
-
parser.on(
|
76
|
+
parser.on('-h', '--help', 'How to use') do
|
72
77
|
puts parser
|
73
78
|
exit
|
74
79
|
end
|
@@ -96,7 +101,7 @@ module HoneyFormat
|
|
96
101
|
header_only: header_only,
|
97
102
|
rows_only: rows_only,
|
98
103
|
skip_lines: skip_lines,
|
99
|
-
type_map: type_map
|
104
|
+
type_map: type_map,
|
100
105
|
}
|
101
106
|
end
|
102
107
|
|
@@ -1,26 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/helpers/helpers'
|
4
|
+
|
1
5
|
module HoneyFormat
|
2
6
|
# Holds HoneyFormat configuration
|
3
|
-
# @
|
4
|
-
# @
|
5
|
-
# @
|
6
|
-
# @
|
7
|
+
# @attr_accessor [String] delimiter the default column delimiter (default: ,)
|
8
|
+
# @attr_accessor [String, Symbol] row_delimiter the default row delimiter (default: :auto)
|
9
|
+
# @attr_accessor [String] quote_character the default quote character (default: ")
|
10
|
+
# @attr_accessor [String, Regexp] skip_lines skip all lines matching pattern (default: nil)
|
7
11
|
class Configuration
|
8
|
-
attr_accessor :
|
12
|
+
attr_accessor :delimiter, :row_delimiter, :quote_character, :skip_lines
|
9
13
|
|
10
14
|
# Instantiate configuration
|
11
15
|
def initialize
|
12
|
-
@
|
13
|
-
@header_converter =
|
16
|
+
@converter_registry = nil
|
17
|
+
@header_converter = nil
|
18
|
+
@header_deduplicator = nil
|
19
|
+
@delimiter = ','
|
20
|
+
@row_delimiter = :auto
|
21
|
+
@quote_character = '"'
|
22
|
+
@skip_lines = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the header converter
|
26
|
+
# @return [#call] header_converter the configured header converter
|
27
|
+
def header_converter
|
28
|
+
@header_converter ||= converter_registry[:header_column]
|
14
29
|
end
|
15
30
|
|
16
31
|
# Set the header converter
|
17
|
-
# @param [Symbol, #call] converter for registered
|
32
|
+
# @param [Symbol, #call] converter for registered converter registry or object that
|
33
|
+
# responds to #call
|
18
34
|
# @return [#call] the header converter
|
19
35
|
def header_converter=(converter)
|
20
|
-
if converter.is_a?(Symbol)
|
21
|
-
|
36
|
+
@header_converter = if converter.is_a?(Symbol)
|
37
|
+
converter_registry[converter]
|
38
|
+
else
|
39
|
+
converter
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the deduplication header strategy
|
44
|
+
# @return [#call] the header deduplication strategy
|
45
|
+
def header_deduplicator
|
46
|
+
@header_deduplicator ||= header_deduplicator_registry[:deduplicate]
|
47
|
+
end
|
48
|
+
|
49
|
+
# Set the deduplication header strategy
|
50
|
+
# @param [Symbol, #call]
|
51
|
+
# symbol with known strategy identifier or method that responds
|
52
|
+
# to #call(colums, key_count)
|
53
|
+
# @return [#call] the header deduplication strategy
|
54
|
+
# @raise [UnknownDeduplicationStrategyError]
|
55
|
+
def header_deduplicator=(strategy)
|
56
|
+
if header_deduplicator_registry.type?(strategy)
|
57
|
+
@header_deduplicator = header_deduplicator_registry[strategy]
|
58
|
+
elsif strategy.respond_to?(:call)
|
59
|
+
@header_deduplicator = strategy
|
60
|
+
else
|
61
|
+
message = "unknown deduplication strategy: '#{strategy}'"
|
62
|
+
raise(Errors::UnknownDeduplicationStrategyError, message)
|
22
63
|
end
|
23
|
-
|
64
|
+
end
|
65
|
+
|
66
|
+
# Default header deduplicate strategies
|
67
|
+
# @return [Hash] the default header deduplicatation strategies
|
68
|
+
def default_header_deduplicators
|
69
|
+
@default_header_deduplicators ||= {
|
70
|
+
deduplicate: proc do |columns|
|
71
|
+
Helpers.key_count_to_deduplicated_array(columns)
|
72
|
+
end,
|
73
|
+
raise: proc do |columns|
|
74
|
+
duplicates = Helpers.duplicated_items(columns)
|
75
|
+
if duplicates.any?
|
76
|
+
message = "all columns must be unique, duplicates are: #{duplicates}"
|
77
|
+
raise(Errors::DuplicateHeaderColumnError, message)
|
78
|
+
end
|
79
|
+
columns
|
80
|
+
end,
|
81
|
+
none: proc { |columns| columns },
|
82
|
+
}.freeze
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns the column deduplication registry
|
86
|
+
# @return [#call] column deduplication registry
|
87
|
+
def header_deduplicator_registry
|
88
|
+
@header_deduplicator_registry ||= Registry.new(default_header_deduplicators)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns the converter registry
|
92
|
+
# @return [#call] converter the configured converter registry
|
93
|
+
def converter_registry
|
94
|
+
@converter_registry ||= Registry.new(default_converters)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Default converter registry
|
98
|
+
# @return [Hash] hash with default converters
|
99
|
+
def default_converters
|
100
|
+
@default_converters ||= {
|
101
|
+
# strict variants
|
102
|
+
decimal!: StrictConvertDecimal,
|
103
|
+
integer!: StrictConvertInteger,
|
104
|
+
date!: StrictConvertDate,
|
105
|
+
datetime!: StrictConvertDatetime,
|
106
|
+
symbol!: StrictConvertSymbol,
|
107
|
+
downcase!: StrictConvertDowncase,
|
108
|
+
upcase!: StrictConvertUpcase,
|
109
|
+
boolean!: StrictConvertBoolean,
|
110
|
+
# safe variants
|
111
|
+
decimal: ConvertDecimal,
|
112
|
+
decimal_or_zero: ConvertDecimalOrZero,
|
113
|
+
integer: ConvertInteger,
|
114
|
+
integer_or_zero: ConvertIntegerOrZero,
|
115
|
+
date: ConvertDate,
|
116
|
+
datetime: ConvertDatetime,
|
117
|
+
symbol: ConvertSymbol,
|
118
|
+
downcase: ConvertDowncase,
|
119
|
+
upcase: ConvertUpcase,
|
120
|
+
boolean: ConvertBoolean,
|
121
|
+
md5: ConvertMD5,
|
122
|
+
hex: ConvertHex,
|
123
|
+
nil: ConvertNil,
|
124
|
+
blank: ConvertBlank,
|
125
|
+
header_column: ConvertHeaderColumn,
|
126
|
+
}.freeze
|
24
127
|
end
|
25
128
|
end
|
26
129
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# String values considered truthy
|
5
|
+
TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
|
6
|
+
# String values considered falsy
|
7
|
+
FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
|
8
|
+
|
9
|
+
# Tries to convert value boolean to, returns nil if it can't convert
|
10
|
+
ConvertBoolean = proc do |v|
|
11
|
+
if TRUTHY.include?(v)
|
12
|
+
true
|
13
|
+
elsif FALSY.include?(v)
|
14
|
+
false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Convert to boolean or raise error
|
19
|
+
StrictConvertBoolean = proc do |v|
|
20
|
+
ConvertBoolean.call(v).tap do |value|
|
21
|
+
raise(ArgumentError, "can't convert #{v} to boolean") if value.nil?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
module HoneyFormat
|
7
|
+
# Convert to date
|
8
|
+
ConvertDate = proc do |v|
|
9
|
+
begin
|
10
|
+
StrictConvertDate.call(v)
|
11
|
+
rescue ArgumentError, TypeError
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Convert to datetime
|
17
|
+
ConvertDatetime = proc do |v|
|
18
|
+
begin
|
19
|
+
StrictConvertDatetime.call(v)
|
20
|
+
rescue ArgumentError, TypeError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to date or raise error
|
26
|
+
StrictConvertDate = proc { |v| Date.parse(v) }
|
27
|
+
|
28
|
+
# Convert to datetime or raise error
|
29
|
+
StrictConvertDatetime = proc { |v| Time.parse(v) }
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Converts decimal or nil
|
5
|
+
ConvertDecimal = proc do |v|
|
6
|
+
begin
|
7
|
+
Float(v)
|
8
|
+
rescue ArgumentError, TypeError
|
9
|
+
nil
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Converts to decimal or zero
|
14
|
+
ConvertDecimalOrZero = proc { |v| v.to_f }
|
15
|
+
|
16
|
+
# Convert to integer or nil
|
17
|
+
ConvertInteger = proc do |v|
|
18
|
+
begin
|
19
|
+
Integer(v)
|
20
|
+
rescue ArgumentError, TypeError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to integer or zero
|
26
|
+
ConvertIntegerOrZero = proc { |v| v.to_i }
|
27
|
+
|
28
|
+
# Convert to decimal or raise error
|
29
|
+
StrictConvertDecimal = proc { |v| Float(v) }
|
30
|
+
|
31
|
+
# Convert to integer or raise error
|
32
|
+
StrictConvertInteger = proc { |v| Integer(v) }
|
33
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'digest'
|
4
|
+
require 'securerandom'
|
5
|
+
|
6
|
+
module HoneyFormat
|
7
|
+
# Convert to downcase or nil
|
8
|
+
ConvertDowncase = proc { |v| v&.downcase }
|
9
|
+
|
10
|
+
# Convert to upcase or nil
|
11
|
+
ConvertUpcase = proc { |v| v&.upcase }
|
12
|
+
|
13
|
+
# Convert to symbol or nil
|
14
|
+
ConvertSymbol = proc { |v| v&.to_sym }
|
15
|
+
|
16
|
+
# Convert to md5 or nil
|
17
|
+
ConvertMD5 = proc { |v| Digest::MD5.hexdigest(v) if v }
|
18
|
+
|
19
|
+
# Convert to hex or nil
|
20
|
+
ConvertHex = proc { |v| SecureRandom.hex if v }
|
21
|
+
|
22
|
+
# Convert to blank string
|
23
|
+
ConvertBlank = proc { '' }
|
24
|
+
|
25
|
+
# Convert header column
|
26
|
+
ConvertHeaderColumn = HeaderColumnConverter
|
27
|
+
|
28
|
+
# Convert to upcase or raise error
|
29
|
+
StrictConvertUpcase = proc do |v|
|
30
|
+
ConvertUpcase.call(v) || raise(ArgumentError, "can't convert nil to upcased string")
|
31
|
+
end
|
32
|
+
|
33
|
+
# Convert to downcase or raise error
|
34
|
+
StrictConvertDowncase = proc do |v|
|
35
|
+
ConvertDowncase.call(v) || raise(ArgumentError, "can't convert nil to downcased string")
|
36
|
+
end
|
37
|
+
|
38
|
+
# Convert to symbol or raise error
|
39
|
+
StrictConvertSymbol = proc do |v|
|
40
|
+
ConvertSymbol.call(v) || raise(ArgumentError, "can't convert nil to symbol")
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/converters/header_column_converter'
|
4
|
+
require 'honey_format/converters/convert_boolean'
|
5
|
+
require 'honey_format/converters/convert_date_and_time'
|
6
|
+
require 'honey_format/converters/convert_number'
|
7
|
+
require 'honey_format/converters/convert_string'
|
8
|
+
|
9
|
+
module HoneyFormat
|
10
|
+
# Convert to nil
|
11
|
+
ConvertNil = proc {}
|
12
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Header column converter
|
5
|
+
module HeaderColumnConverter
|
6
|
+
# Bracket character matcher
|
7
|
+
BRACKETS = /\(|\[|\{|\)|\]|\}/
|
8
|
+
|
9
|
+
# Separator characters
|
10
|
+
SEPS = /'|"|\||\*|\^|\&|%|\$|€|#/
|
11
|
+
|
12
|
+
# Replace map
|
13
|
+
REPLACE_MAP = [
|
14
|
+
[/\\/, '/'], # replace "\" with "/"
|
15
|
+
[/ \(/, '('], # replace " (" with "("
|
16
|
+
[/ \[/, '['], # replace " [" with "["
|
17
|
+
[/ \{/, '{'], # replace " {" with "{"
|
18
|
+
[/ \{/, '{'], # replace " {" with "{"
|
19
|
+
[/\) /, ')'], # replace ") " with ")"
|
20
|
+
[/\] /, ']'], # replace "] " with "]"
|
21
|
+
[/\} /, '}'], # replace "} " with "}"
|
22
|
+
[BRACKETS, '_'], # replace (, [, {, ), ] and } with "_"
|
23
|
+
[/ +/, '_'], # replace one or more spaces with "_"
|
24
|
+
[/-/, '_'], # replace "-" with "("
|
25
|
+
[/::/, '_'], # replace "::" with "_"
|
26
|
+
[%r{/}, '_'], # replace "/" with "_"
|
27
|
+
[SEPS, '_'], # replace separator chars with "_"
|
28
|
+
[/_+/, '_'], # replace one or more "_" with single "_"
|
29
|
+
[/\A_+/, ''], # remove leading "_"
|
30
|
+
[/_+\z/, ''], # remove trailing "_"
|
31
|
+
].map(&:freeze).freeze
|
32
|
+
|
33
|
+
# Returns converted value and mutates the argument.
|
34
|
+
# @return [Symbol] the cleaned header column.
|
35
|
+
# @param [String] column the string to be cleaned.
|
36
|
+
# @param [Integer] index the column index.
|
37
|
+
# @example Convert simple header
|
38
|
+
# HeaderColumnConverter.call(" User name ") #=> "user_name"
|
39
|
+
# @example Convert complex header
|
40
|
+
# HeaderColumnConverter.call(" First name (user)") #=> :'first_name(user)'
|
41
|
+
def self.call(column, index = nil)
|
42
|
+
if column.nil? || column.empty?
|
43
|
+
raise(ArgumentError, "column and column index can't be blank/nil") unless index
|
44
|
+
return :"column#{index}"
|
45
|
+
end
|
46
|
+
|
47
|
+
column = column.dup
|
48
|
+
column.strip!
|
49
|
+
column.downcase!
|
50
|
+
REPLACE_MAP.each do |data|
|
51
|
+
from, to = data
|
52
|
+
column.gsub!(from, to)
|
53
|
+
end
|
54
|
+
column.to_sym
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'csv'
|
3
4
|
require 'honey_format/matrix'
|
4
|
-
# require 'honey_format/rows'
|
5
|
-
# require 'honey_format/header'
|
6
5
|
|
7
6
|
module HoneyFormat
|
8
7
|
# Represents CSV.
|
@@ -10,14 +9,18 @@ module HoneyFormat
|
|
10
9
|
# Instantiate CSV.
|
11
10
|
# @return [CSV] a new instance of CSV.
|
12
11
|
# @param [String] csv the CSV string
|
13
|
-
# @param [String]
|
14
|
-
# @param [String, Symbol]
|
15
|
-
# @param [String]
|
16
|
-
# @param [Array<String>]
|
17
|
-
#
|
18
|
-
#
|
12
|
+
# @param delimiter [String] the CSV column delimiter
|
13
|
+
# @param row_delimiter [String, Symbol] the CSV row delimiter (default: :auto)
|
14
|
+
# @param quote_character [String] the CSV quote character (default: ")
|
15
|
+
# @param header [Array<String>]
|
16
|
+
# header optional argument that represents CSV header, required if the CSV file
|
17
|
+
# lacks a header row.
|
18
|
+
# @param header_converter [#call] converts header columns.
|
19
|
+
# @param header_deduplicator [#call] deduplicates header columns.
|
20
|
+
# @param row_builder [#call] will be called for each parsed row.
|
19
21
|
# @param type_map [Hash] map of column_name => type conversion to perform.
|
20
|
-
# @param skip_lines [Regexp, String]
|
22
|
+
# @param skip_lines [Regexp, String]
|
23
|
+
# Regexp for determining wheter a line is a comment. See CSV skip_lines option.
|
21
24
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
22
25
|
# @raise [MissingHeaderError] raised when header is missing (empty or nil).
|
23
26
|
# @raise [MissingHeaderColumnError] raised when header column is missing.
|
@@ -40,17 +43,21 @@ module HoneyFormat
|
|
40
43
|
# rescue HoneyFormat::RowError => e
|
41
44
|
# puts "row error: #{e.class}, #{e.message}"
|
42
45
|
# end
|
46
|
+
# @example Skip lines all lines starting with '#'
|
47
|
+
# csv = HoneyFormat::CSV.new("name,id\n# some comment\njacob,1", skip_lines: '#')
|
48
|
+
# csv.rows.length # => 1
|
43
49
|
# @see Matrix#new
|
44
50
|
def initialize(
|
45
51
|
csv,
|
46
|
-
delimiter:
|
47
|
-
row_delimiter:
|
48
|
-
quote_character:
|
52
|
+
delimiter: HoneyFormat.config.delimiter,
|
53
|
+
row_delimiter: HoneyFormat.config.row_delimiter,
|
54
|
+
quote_character: HoneyFormat.config.quote_character,
|
49
55
|
header: nil,
|
50
56
|
header_converter: HoneyFormat.header_converter,
|
57
|
+
header_deduplicator: HoneyFormat.config.header_deduplicator,
|
51
58
|
row_builder: nil,
|
52
59
|
type_map: {},
|
53
|
-
skip_lines:
|
60
|
+
skip_lines: HoneyFormat.config.skip_lines
|
54
61
|
)
|
55
62
|
csv = ::CSV.parse(
|
56
63
|
csv,
|
@@ -64,6 +71,7 @@ module HoneyFormat
|
|
64
71
|
csv,
|
65
72
|
header: header,
|
66
73
|
header_converter: header_converter,
|
74
|
+
header_deduplicator: header_deduplicator,
|
67
75
|
row_builder: row_builder,
|
68
76
|
type_map: type_map
|
69
77
|
)
|
data/lib/honey_format/errors.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module HoneyFormat
|
2
4
|
# Errors
|
3
5
|
module Errors
|
@@ -8,6 +10,10 @@ module HoneyFormat
|
|
8
10
|
class MissingHeaderError < HeaderError; end
|
9
11
|
# Raised when header column is missing
|
10
12
|
class MissingHeaderColumnError < HeaderError; end
|
13
|
+
# Raised when header column duplicate is found
|
14
|
+
class DuplicateHeaderColumnError < HeaderError; end
|
15
|
+
# Raised when deduplication strategy is unknown
|
16
|
+
class UnknownDeduplicationStrategyError < HeaderError; end
|
11
17
|
|
12
18
|
# Row errors
|
13
19
|
# Super class of errors raised when there is a row error
|
@@ -19,9 +25,9 @@ module HoneyFormat
|
|
19
25
|
|
20
26
|
# Value conversion errors
|
21
27
|
# Raised when value type is unknown
|
22
|
-
class
|
28
|
+
class UnknownTypeError < ArgumentError; end
|
23
29
|
# Raised when value type already exists
|
24
|
-
class
|
30
|
+
class TypeExistsError < ArgumentError; end
|
25
31
|
end
|
26
32
|
|
27
33
|
include Errors
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
module Helpers
|
5
|
+
# Converts a Hash with key => count to a deduplicated array.
|
6
|
+
# @param [Hash] data with key => count
|
7
|
+
# @return [Array<Symbol>] an array of symbols
|
8
|
+
# @example
|
9
|
+
# Helpers.key_count_to_deduplicated_array({ a: 2, b: 1, c: 0})
|
10
|
+
# # => [:a, :a1, :b]
|
11
|
+
def self.key_count_to_deduplicated_array(data)
|
12
|
+
array = []
|
13
|
+
count_occurences(data).each do |key, value|
|
14
|
+
next array << key if value == 1
|
15
|
+
|
16
|
+
values = Array.new(value) { |i| i }.map do |index|
|
17
|
+
next key if index.zero?
|
18
|
+
:"#{key}#{index}"
|
19
|
+
end
|
20
|
+
array.concat(values)
|
21
|
+
end
|
22
|
+
array
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns hash with key => occurrences_count
|
26
|
+
# @param [Array<Object>] the array to count occurrences in
|
27
|
+
# @return [Hash] key => occurrences_count
|
28
|
+
def self.count_occurences(array)
|
29
|
+
occurrences = Hash.new(0)
|
30
|
+
array.each { |column| occurrences[column] += 1 }
|
31
|
+
occurrences
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns array with duplicated objects
|
35
|
+
# @param [Array<Object>] the array to find duplicates in
|
36
|
+
# @return [Array<Object>] array of duplicated objects
|
37
|
+
def self.duplicated_items(array)
|
38
|
+
array.select { |col| array.count(col) > 1 }.uniq
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'honey_format/helpers/helpers'
|
2
4
|
|
3
5
|
module HoneyFormat
|
4
6
|
# Represents a header
|
@@ -8,28 +10,36 @@ module HoneyFormat
|
|
8
10
|
# Instantiate a Header
|
9
11
|
# @return [Header] a new instance of Header.
|
10
12
|
# @param [Array<String>] header array of strings.
|
11
|
-
# @param converter [#call, Symbol]
|
13
|
+
# @param converter [#call, Symbol]
|
14
|
+
# header converter that implements a #call method
|
15
|
+
# that takes one column (string) argument OR symbol for a registered
|
16
|
+
# converter registry.
|
17
|
+
# @param deduplicator [#call, Symbol]
|
18
|
+
# header deduplicator that implements a #call method
|
19
|
+
# that takes columns Array<String> argument OR symbol for a registered
|
20
|
+
# deduplicator registry.
|
12
21
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
13
22
|
# @raise [MissingHeaderColumnError] raised when header is missing
|
14
23
|
# @example Instantiate a header with a custom converter
|
15
24
|
# converter = ->(col) { col == 'username' ? 'handle' : col }
|
16
25
|
# header = HoneyFormat::Header.new(['name', 'username'], converter: converter)
|
17
26
|
# header.to_a # => ['name', 'handle']
|
18
|
-
def initialize(
|
27
|
+
def initialize(
|
28
|
+
header,
|
29
|
+
converter: HoneyFormat.header_converter,
|
30
|
+
deduplicator: HoneyFormat.config.header_deduplicator
|
31
|
+
)
|
19
32
|
if header.nil? || header.empty?
|
20
33
|
raise(Errors::MissingHeaderError, "CSV header can't be empty.")
|
21
34
|
end
|
22
35
|
|
23
36
|
@original_header = header
|
24
|
-
|
25
|
-
|
26
|
-
else
|
27
|
-
converter
|
28
|
-
end
|
29
|
-
|
37
|
+
self.deduplicator = deduplicator
|
38
|
+
self.converter = converter
|
30
39
|
@columns = build_columns(@original_header)
|
31
40
|
end
|
32
41
|
|
42
|
+
# Returns the original header
|
33
43
|
# @return [Array<String>] the original header
|
34
44
|
def original
|
35
45
|
@original_header
|
@@ -82,15 +92,41 @@ module HoneyFormat
|
|
82
92
|
|
83
93
|
private
|
84
94
|
|
95
|
+
# Set the header converter
|
96
|
+
# @param [Symbol, #call] symbol to known converter or object that responds to #call
|
97
|
+
# @return [nil]
|
98
|
+
def converter=(object)
|
99
|
+
if object.is_a?(Symbol)
|
100
|
+
@converter = HoneyFormat.converter_registry[object]
|
101
|
+
return
|
102
|
+
end
|
103
|
+
|
104
|
+
@converter = object
|
105
|
+
end
|
106
|
+
|
107
|
+
# Set the header deduplicator
|
108
|
+
# @param [Symbol, #call] symbol to known deduplicator or object that responds to #call
|
109
|
+
# @return [nil]
|
110
|
+
def deduplicator=(object)
|
111
|
+
if object.is_a?(Symbol)
|
112
|
+
@deduplicator = HoneyFormat.header_deduplicator_registry[object]
|
113
|
+
return
|
114
|
+
end
|
115
|
+
|
116
|
+
@deduplicator = object
|
117
|
+
end
|
118
|
+
|
85
119
|
# Convert original header
|
86
120
|
# @param [Array<String>] header the original header
|
87
121
|
# @return [Array<String>] converted columns
|
88
122
|
def build_columns(header)
|
89
|
-
header.each_with_index.map do |header_column, index|
|
123
|
+
columns = header.each_with_index.map do |header_column, index|
|
90
124
|
convert_column(header_column, index).tap do |column|
|
91
125
|
maybe_raise_missing_column!(column)
|
92
126
|
end
|
93
127
|
end
|
128
|
+
|
129
|
+
@deduplicator.call(columns)
|
94
130
|
end
|
95
131
|
|
96
132
|
# Convert the column value
|
@@ -122,8 +158,8 @@ module HoneyFormat
|
|
122
158
|
|
123
159
|
parts = [
|
124
160
|
"CSV header column can't be nil or empty!",
|
125
|
-
|
126
|
-
'Instead generate unique columns names.'
|
161
|
+
'When you pass your own converter make sure that it never returns nil or an empty string.', # rubocop:disable Metrics/LineLength
|
162
|
+
'Instead generate unique columns names.',
|
127
163
|
]
|
128
164
|
raise(Errors::MissingHeaderColumnError, parts.join(' '))
|
129
165
|
end
|