honey_format 0.17.0 → 0.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +27 -6
- data/CHANGELOG.md +37 -0
- data/README.md +41 -12
- data/exe/honey_format +7 -1
- data/honey_format.gemspec +2 -2
- data/lib/honey_format/cli/cli.rb +8 -0
- data/lib/honey_format/configuration.rb +1 -27
- data/lib/honey_format/converters/convert_boolean.rb +4 -2
- data/lib/honey_format/converters/convert_date_and_time.rb +2 -2
- data/lib/honey_format/converters/converters.rb +31 -0
- data/lib/honey_format/converters/header_column_converter.rb +36 -22
- data/lib/honey_format/csv.rb +6 -1
- data/lib/honey_format/helpers/helpers.rb +1 -0
- data/lib/honey_format/matrix/header.rb +41 -14
- data/lib/honey_format/matrix/row.rb +1 -1
- data/lib/honey_format/matrix/row_builder.rb +2 -0
- data/lib/honey_format/matrix/rows.rb +7 -0
- data/lib/honey_format/registry.rb +4 -1
- data/lib/honey_format/version.rb +2 -2
- metadata +13 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd0d06f453986109809c196cef032b5f60233e165a8d5e029162b26b6cc801b2
|
4
|
+
data.tar.gz: 8f5a15f42e9f2587bc80b7e88d5c929d74a04ef69bd6be272513ba8fc544a22a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 563e6ba6a8ede49e13d15957c12c247b6e26c981afe25b150d6ae8c6c3dab58929c100341dcc0181790a03eabffca5ddd3ac127cf97fcdb4ac76ef04b01c9839
|
7
|
+
data.tar.gz: 93a08cb9b2683961e80f8f2bb79f763f05fb1401e112458a6188bf86530233e93f9d397b0ee415e4ce9c8b95ff644485e50dc6b52a2c8381aa4379b0bce0e65a
|
data/.travis.yml
CHANGED
@@ -1,7 +1,28 @@
|
|
1
1
|
language: ruby
|
2
|
-
|
3
|
-
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
script:
|
3
|
+
- bundle exec rspec
|
4
|
+
matrix:
|
5
|
+
fast_finish: true
|
6
|
+
allow_failures:
|
7
|
+
- name: TruffleRuby
|
8
|
+
rvm: system
|
9
|
+
include:
|
10
|
+
- rvm: 2.3.0
|
11
|
+
install:
|
12
|
+
- gem install bundler --no-document
|
13
|
+
- bundle install
|
14
|
+
- rvm: 2.5.1
|
15
|
+
install:
|
16
|
+
- gem install bundler --no-document
|
17
|
+
- bundle install
|
18
|
+
- rvm: 2.6.0-preview3
|
19
|
+
install:
|
20
|
+
- bundle install
|
21
|
+
- name: TruffleRuby
|
22
|
+
rvm: system
|
23
|
+
install:
|
24
|
+
- export TRUFFLERUBY_VERSION=1.0.0-rc10
|
25
|
+
- curl -L https://github.com/oracle/truffleruby/releases/download/vm-$TRUFFLERUBY_VERSION/truffleruby-$TRUFFLERUBY_VERSION-linux-amd64.tar.gz | tar xz
|
26
|
+
- export PATH="$PWD/truffleruby-$TRUFFLERUBY_VERSION-linux-amd64/bin:$PATH"
|
27
|
+
- gem install bundler -v 1.16.6 --no-ri --no-rdoc
|
28
|
+
- bundle install
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,42 @@
|
|
1
1
|
# HEAD
|
2
2
|
|
3
|
+
## v0.21.1
|
4
|
+
|
5
|
+
* Closes [issue #58](https://github.com/buren/honey_format/issues/58). [PR #62](https://github.com/buren/honey_format/pull/62)
|
6
|
+
|
7
|
+
## v0.21.0
|
8
|
+
|
9
|
+
* Add `Rows#[]` method
|
10
|
+
|
11
|
+
## v0.20.0
|
12
|
+
|
13
|
+
* Support additional header variant, pass hash with `String => Symbol` and/or `String => #call` (callable object). Unmapped keys are converted using the default header converter.
|
14
|
+
```ruby
|
15
|
+
converter = {
|
16
|
+
'First name' => :first_name,
|
17
|
+
'Last name' => -> { :surname }
|
18
|
+
}
|
19
|
+
csv = HoneyFormat::CSV.new(csv_string, header_converter: converter)
|
20
|
+
```
|
21
|
+
* Add `encoding` option to `CSV`
|
22
|
+
|
23
|
+
## v0.19.0
|
24
|
+
|
25
|
+
* Add `method_name` as alias for `header_column` converter
|
26
|
+
* Freeze constants in `HeaderColumnConverter`
|
27
|
+
* Add support for passing a callable object in `type_map` argument [PR#49](https://github.com/buren/honey_format/pull/49)
|
28
|
+
* Remove non-printable & zero-width characters from header columns
|
29
|
+
* Add `£` to header separator character list
|
30
|
+
* Replace `@` with `_at_` in header converter
|
31
|
+
* Replace all space like chars in Header converter. Closes [Issue#39]([PR#49](https://github.com/buren/honey_format/issues/39))
|
32
|
+
* Improved CLI output when input path is missing
|
33
|
+
|
34
|
+
## v0.18.0
|
35
|
+
|
36
|
+
* Require `set` class in `ConvertBoolean` - fixes crash when set is already required in environment
|
37
|
+
* Allow symbols to be passed to `HeaderColumnConverter`
|
38
|
+
* Replace `.` and `,` with `_` in header column converter
|
39
|
+
|
3
40
|
## v0.17.0
|
4
41
|
|
5
42
|
:warning: This release contains some backwards compatible changes.
|
data/README.md
CHANGED
@@ -27,8 +27,9 @@ Id,Username,Email
|
|
27
27
|
2,jacob,jacob@example.com
|
28
28
|
CSV
|
29
29
|
csv = HoneyFormat::CSV.new(csv_string, type_map: { id: :integer })
|
30
|
-
csv.columns # => [:id, :username]
|
31
|
-
|
30
|
+
csv.columns # => [:id, :username, :email]
|
31
|
+
csv.rows # => [#<Row id=1, username="buren", email="buren@example.com">, #<Row id=2, username="jacob", email="jacob@example.com">]
|
32
|
+
user = csv.rows.first
|
32
33
|
user.id # => 1
|
33
34
|
user.username # => "buren"
|
34
35
|
|
@@ -90,7 +91,7 @@ __Type converters__
|
|
90
91
|
|
91
92
|
> Type converters are great if you want to convert column values, like numbers and dates.
|
92
93
|
|
93
|
-
There are a
|
94
|
+
There are a bunch of [default type converters](https://github.com/buren/honey_format/blob/master/lib/honey_format/converters/converters.rb)
|
94
95
|
```ruby
|
95
96
|
csv_string = "Id,Username\n1,buren"
|
96
97
|
type_map = { id: :integer }
|
@@ -98,7 +99,15 @@ csv = HoneyFormat::CSV.new(csv_string, type_map: type_map)
|
|
98
99
|
csv.rows.first.id # => 1
|
99
100
|
```
|
100
101
|
|
101
|
-
|
102
|
+
Pass your own
|
103
|
+
```ruby
|
104
|
+
csv_string = "Id,Username\n1,buren"
|
105
|
+
type_map = { username: proc { |v| v.upcase } }
|
106
|
+
csv = HoneyFormat::CSV.new(csv_string, type_map: type_map)
|
107
|
+
csv.rows.first.username # => "BUREN"
|
108
|
+
```
|
109
|
+
|
110
|
+
Register your own converter
|
102
111
|
```ruby
|
103
112
|
HoneyFormat.configure do |config|
|
104
113
|
config.converter_registry.register :upcased, proc { |v| v.upcase }
|
@@ -125,7 +134,12 @@ decimal_converter = HoneyFormat.converter_registry[:decimal]
|
|
125
134
|
decimal_converter.call('1.1') # => 1.1
|
126
135
|
```
|
127
136
|
|
128
|
-
|
137
|
+
Default converter names
|
138
|
+
```ruby
|
139
|
+
HoneyFormat.config.default_converters.keys
|
140
|
+
```
|
141
|
+
|
142
|
+
See [`Converters::DEFAULT`](https://github.com/buren/honey_format/blob/master/lib/honey_format/converters.rb) for a complete list of the default converter names.
|
129
143
|
|
130
144
|
__Row builder__
|
131
145
|
|
@@ -234,21 +248,36 @@ csv.columns # => [:ID, :USERNAME]
|
|
234
248
|
|
235
249
|
Pass your own header converter
|
236
250
|
```ruby
|
237
|
-
|
238
|
-
|
251
|
+
# unmapped keys use the default header converter,
|
252
|
+
# mix simple key => value mapping with key => proc
|
253
|
+
converter = {
|
254
|
+
'First^Name' => :first_name,
|
255
|
+
'Username' => -> { :handle }
|
256
|
+
}
|
239
257
|
|
240
|
-
csv_string = "ID,First^Name\n1,Jacob"
|
258
|
+
csv_string = "ID,Username,First^Name\n1,buren,Jacob"
|
241
259
|
user = HoneyFormat::CSV.new(csv_string, header_converter: converter).rows.first
|
242
260
|
user.first_name # => "Jacob"
|
261
|
+
user.handle # => "buren"
|
243
262
|
user.id # => "1"
|
263
|
+
|
264
|
+
# you can also pass a proc or any callable object
|
265
|
+
converter = Class.new do
|
266
|
+
define_singleton_method(:call) { |value, index| "#{value}#{index}" }
|
267
|
+
end
|
268
|
+
# or
|
269
|
+
converter = ->(value, index) { "#{value}#{index}" }
|
270
|
+
user = HoneyFormat::CSV.new(csv_string, header_converter: converter)
|
244
271
|
```
|
245
272
|
|
246
|
-
Missing header values
|
273
|
+
Missing header values are automatically set and deduplicated
|
247
274
|
```ruby
|
248
|
-
csv_string = "first,,third\nval0,val1,val2"
|
275
|
+
csv_string = "first,,third,third\nval0,val1,val2,val3"
|
249
276
|
csv = HoneyFormat::CSV.new(csv_string)
|
250
277
|
user = csv.rows.first
|
251
278
|
user.column1 # => "val1"
|
279
|
+
user.third # => "val2"
|
280
|
+
user.third1 # => "val3"
|
252
281
|
```
|
253
282
|
|
254
283
|
Duplicated header values
|
@@ -288,7 +317,7 @@ user['first^name'] # => "Jacob"
|
|
288
317
|
|
289
318
|
__Errors__
|
290
319
|
|
291
|
-
> When you need
|
320
|
+
> When you need to be extra safe.
|
292
321
|
|
293
322
|
If you want to there are some errors you can rescue
|
294
323
|
```ruby
|
@@ -403,7 +432,7 @@ Usage: bin/benchmark [file.csv] [options]
|
|
403
432
|
--[no-]verbose Verbose output
|
404
433
|
--lines-multipliers=[1,2,10] Multiply the rows in the CSV file (default: 1)
|
405
434
|
--time=[30] Benchmark time (default: 30)
|
406
|
-
--warmup=[
|
435
|
+
--warmup=[5] Benchmark warmup (default: 5)
|
407
436
|
-h, --help How to use
|
408
437
|
```
|
409
438
|
|
data/exe/honey_format
CHANGED
@@ -10,7 +10,13 @@ require 'honey_format/cli/cli'
|
|
10
10
|
cli = HoneyFormat::CLI.new
|
11
11
|
options = cli.options
|
12
12
|
|
13
|
-
input_path = options[:input_path]
|
13
|
+
input_path = options[:input_path]
|
14
|
+
unless input_path
|
15
|
+
puts cli.usage
|
16
|
+
puts
|
17
|
+
puts '[ERROR] input path required'
|
18
|
+
exit 1
|
19
|
+
end
|
14
20
|
csv_input = File.read(input_path)
|
15
21
|
csv = HoneyFormat::CSV.new(
|
16
22
|
csv_input,
|
data/honey_format.gemspec
CHANGED
@@ -23,9 +23,9 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = '>= 2.3.0'
|
24
24
|
|
25
25
|
spec.add_development_dependency 'benchmark-ips'
|
26
|
-
spec.add_development_dependency 'bundler', '
|
26
|
+
spec.add_development_dependency 'bundler', '> 1.10', '< 3'
|
27
27
|
spec.add_development_dependency 'byebug'
|
28
|
-
spec.add_development_dependency 'rake', '~>
|
28
|
+
spec.add_development_dependency 'rake', '~> 12.3'
|
29
29
|
spec.add_development_dependency 'rspec'
|
30
30
|
spec.add_development_dependency 'simplecov'
|
31
31
|
end
|
data/lib/honey_format/cli/cli.rb
CHANGED
@@ -12,9 +12,14 @@ module HoneyFormat
|
|
12
12
|
# @return [CLI] the CLI
|
13
13
|
def initialize(argv: ARGV, io: STDOUT)
|
14
14
|
@io = io
|
15
|
+
@parser = nil
|
15
16
|
@options = parse_options(argv: argv.dup)
|
16
17
|
end
|
17
18
|
|
19
|
+
def usage
|
20
|
+
@parser.to_s
|
21
|
+
end
|
22
|
+
|
18
23
|
private
|
19
24
|
|
20
25
|
# Puts to configured IO
|
@@ -38,6 +43,8 @@ module HoneyFormat
|
|
38
43
|
type_map = {}
|
39
44
|
|
40
45
|
OptionParser.new do |parser|
|
46
|
+
@parser = parser
|
47
|
+
|
41
48
|
parser.banner = 'Usage: honey_format [options] <file.csv>'
|
42
49
|
parser.default_argv = argv
|
43
50
|
|
@@ -91,6 +98,7 @@ module HoneyFormat
|
|
91
98
|
if input_path && argv.last
|
92
99
|
raise(ArgumentError, "you can't provide both --csv and <path>")
|
93
100
|
end
|
101
|
+
|
94
102
|
input_path ||= argv.last
|
95
103
|
|
96
104
|
{
|
@@ -97,33 +97,7 @@ module HoneyFormat
|
|
97
97
|
# Default converter registry
|
98
98
|
# @return [Hash] hash with default converters
|
99
99
|
def default_converters
|
100
|
-
@default_converters ||=
|
101
|
-
# strict variants
|
102
|
-
decimal!: StrictConvertDecimal,
|
103
|
-
integer!: StrictConvertInteger,
|
104
|
-
date!: StrictConvertDate,
|
105
|
-
datetime!: StrictConvertDatetime,
|
106
|
-
symbol!: StrictConvertSymbol,
|
107
|
-
downcase!: StrictConvertDowncase,
|
108
|
-
upcase!: StrictConvertUpcase,
|
109
|
-
boolean!: StrictConvertBoolean,
|
110
|
-
# safe variants
|
111
|
-
decimal: ConvertDecimal,
|
112
|
-
decimal_or_zero: ConvertDecimalOrZero,
|
113
|
-
integer: ConvertInteger,
|
114
|
-
integer_or_zero: ConvertIntegerOrZero,
|
115
|
-
date: ConvertDate,
|
116
|
-
datetime: ConvertDatetime,
|
117
|
-
symbol: ConvertSymbol,
|
118
|
-
downcase: ConvertDowncase,
|
119
|
-
upcase: ConvertUpcase,
|
120
|
-
boolean: ConvertBoolean,
|
121
|
-
md5: ConvertMD5,
|
122
|
-
hex: ConvertHex,
|
123
|
-
nil: ConvertNil,
|
124
|
-
blank: ConvertBlank,
|
125
|
-
header_column: ConvertHeaderColumn,
|
126
|
-
}.freeze
|
100
|
+
@default_converters ||= Converters::DEFAULT
|
127
101
|
end
|
128
102
|
end
|
129
103
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'set'
|
4
|
+
|
3
5
|
module HoneyFormat
|
4
6
|
# String values considered truthy
|
5
|
-
TRUTHY = Set.new(%w[t T 1 y Y true TRUE]).freeze
|
7
|
+
TRUTHY = Set.new(%w[t T 1 y Y true TRUE] + [true]).freeze
|
6
8
|
# String values considered falsy
|
7
|
-
FALSY = Set.new(%w[f F 0 n N false FALSE]).freeze
|
9
|
+
FALSY = Set.new(%w[f F 0 n N false FALSE] + [false]).freeze
|
8
10
|
|
9
11
|
# Tries to convert value boolean to, returns nil if it can't convert
|
10
12
|
ConvertBoolean = proc do |v|
|
@@ -23,8 +23,8 @@ module HoneyFormat
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# Convert to date or raise error
|
26
|
-
StrictConvertDate = proc { |v| Date.parse(v) }
|
26
|
+
StrictConvertDate = proc { |v| v.is_a?(Date) ? v : Date.parse(v) }
|
27
27
|
|
28
28
|
# Convert to datetime or raise error
|
29
|
-
StrictConvertDatetime = proc { |v| Time.parse(v) }
|
29
|
+
StrictConvertDatetime = proc { |v| v.is_a?(Time) ? v : Time.parse(v) }
|
30
30
|
end
|
@@ -9,4 +9,35 @@ require 'honey_format/converters/convert_string'
|
|
9
9
|
module HoneyFormat
|
10
10
|
# Convert to nil
|
11
11
|
ConvertNil = proc {}
|
12
|
+
|
13
|
+
module Converters
|
14
|
+
DEFAULT = {
|
15
|
+
# strict variants
|
16
|
+
decimal!: StrictConvertDecimal,
|
17
|
+
integer!: StrictConvertInteger,
|
18
|
+
date!: StrictConvertDate,
|
19
|
+
datetime!: StrictConvertDatetime,
|
20
|
+
symbol!: StrictConvertSymbol,
|
21
|
+
downcase!: StrictConvertDowncase,
|
22
|
+
upcase!: StrictConvertUpcase,
|
23
|
+
boolean!: StrictConvertBoolean,
|
24
|
+
# safe variants
|
25
|
+
decimal: ConvertDecimal,
|
26
|
+
decimal_or_zero: ConvertDecimalOrZero,
|
27
|
+
integer: ConvertInteger,
|
28
|
+
integer_or_zero: ConvertIntegerOrZero,
|
29
|
+
date: ConvertDate,
|
30
|
+
datetime: ConvertDatetime,
|
31
|
+
symbol: ConvertSymbol,
|
32
|
+
downcase: ConvertDowncase,
|
33
|
+
upcase: ConvertUpcase,
|
34
|
+
boolean: ConvertBoolean,
|
35
|
+
md5: ConvertMD5,
|
36
|
+
hex: ConvertHex,
|
37
|
+
nil: ConvertNil,
|
38
|
+
blank: ConvertBlank,
|
39
|
+
header_column: ConvertHeaderColumn,
|
40
|
+
method_name: ConvertHeaderColumn,
|
41
|
+
}.freeze
|
42
|
+
end
|
12
43
|
end
|
@@ -4,35 +4,48 @@ module HoneyFormat
|
|
4
4
|
# Header column converter
|
5
5
|
module HeaderColumnConverter
|
6
6
|
# Bracket character matcher
|
7
|
-
BRACKETS = /\(|\[|\{|\)|\]|\}
|
7
|
+
BRACKETS = /\(|\[|\{|\)|\]|\}/.freeze
|
8
8
|
|
9
9
|
# Separator characters
|
10
|
-
SEPS = /'|"
|
10
|
+
SEPS = /'|"|\||\*|\^|\&|%|\$|€|£|#/.freeze
|
11
|
+
|
12
|
+
# Space characters
|
13
|
+
SPACES = /[[:space:]]+/.freeze
|
14
|
+
|
15
|
+
# Non-printable characters
|
16
|
+
NON_PRINT = /[^[:print:]]/.freeze
|
17
|
+
|
18
|
+
# zero-width characters - see https://stackoverflow.com/q/50647999
|
19
|
+
ZERO_WIDTH = /[\u200B-\u200D\uFEFF]/.freeze
|
11
20
|
|
12
21
|
# Replace map
|
13
22
|
REPLACE_MAP = [
|
14
|
-
[/\\/, '/'],
|
15
|
-
[/ \(/, '('],
|
16
|
-
[/ \[/, '['],
|
17
|
-
[/ \{/, '{'],
|
18
|
-
[/ \{/, '{'],
|
19
|
-
[/\) /, ')'],
|
20
|
-
[/\] /, ']'],
|
21
|
-
[/\} /, '}'],
|
22
|
-
[
|
23
|
-
[
|
24
|
-
[
|
25
|
-
[
|
26
|
-
[
|
27
|
-
[
|
28
|
-
[/
|
29
|
-
[
|
30
|
-
[/_
|
31
|
-
|
23
|
+
[/\\/, '/'], # replace "\" with "/"
|
24
|
+
[/ \(/, '('], # replace " (" with "("
|
25
|
+
[/ \[/, '['], # replace " [" with "["
|
26
|
+
[/ \{/, '{'], # replace " {" with "{"
|
27
|
+
[/ \{/, '{'], # replace " {" with "{"
|
28
|
+
[/\) /, ')'], # replace ") " with ")"
|
29
|
+
[/\] /, ']'], # replace "] " with "]"
|
30
|
+
[/\} /, '}'], # replace "} " with "}"
|
31
|
+
[/@/, '_at_'], # replace "@' with "_at_"
|
32
|
+
[BRACKETS, '_'], # replace (, [, {, ), ] and } with "_"
|
33
|
+
[SPACES, '_'], # replace one or more space chars with "_"
|
34
|
+
[/-/, '_'], # replace "-" with "_"
|
35
|
+
[/\.|,/, '_'], # replace "." and "," with "_"
|
36
|
+
[/::/, '_'], # replace "::" with "_"
|
37
|
+
[%r{/}, '_'], # replace "/" with "_"
|
38
|
+
[SEPS, '_'], # replace separator chars with "_"
|
39
|
+
[/_+/, '_'], # replace one or more "_" with single "_"
|
40
|
+
[NON_PRINT, ''], # remove non-printable characters
|
41
|
+
[ZERO_WIDTH, ''], # remove zero-width characters
|
42
|
+
[/\A_+/, ''], # remove leading "_"
|
43
|
+
[/_+\z/, ''], # remove trailing "_"
|
44
|
+
].map { |e| e.map(&:freeze).freeze }.freeze
|
32
45
|
|
33
46
|
# Returns converted value and mutates the argument.
|
34
47
|
# @return [Symbol] the cleaned header column.
|
35
|
-
# @param [String] column the string to be cleaned.
|
48
|
+
# @param [String, Symbol] column the string to be cleaned.
|
36
49
|
# @param [Integer] index the column index.
|
37
50
|
# @example Convert simple header
|
38
51
|
# HeaderColumnConverter.call(" User name ") #=> "user_name"
|
@@ -41,10 +54,11 @@ module HoneyFormat
|
|
41
54
|
def self.call(column, index = nil)
|
42
55
|
if column.nil? || column.empty?
|
43
56
|
raise(ArgumentError, "column and column index can't be blank/nil") unless index
|
57
|
+
|
44
58
|
return :"column#{index}"
|
45
59
|
end
|
46
60
|
|
47
|
-
column = column.dup
|
61
|
+
column = column.to_s.dup
|
48
62
|
column.strip!
|
49
63
|
column.downcase!
|
50
64
|
REPLACE_MAP.each do |data|
|
data/lib/honey_format/csv.rb
CHANGED
@@ -19,6 +19,7 @@ module HoneyFormat
|
|
19
19
|
# @param header_deduplicator [#call] deduplicates header columns.
|
20
20
|
# @param row_builder [#call] will be called for each parsed row.
|
21
21
|
# @param type_map [Hash] map of column_name => type conversion to perform.
|
22
|
+
# @param encoding [String] CSV encoding (for example "BOM|UTF-16LE:UTF-8").
|
22
23
|
# @param skip_lines [Regexp, String]
|
23
24
|
# Regexp for determining wheter a line is a comment. See CSV skip_lines option.
|
24
25
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
@@ -46,6 +47,8 @@ module HoneyFormat
|
|
46
47
|
# @example Skip lines all lines starting with '#'
|
47
48
|
# csv = HoneyFormat::CSV.new("name,id\n# some comment\njacob,1", skip_lines: '#')
|
48
49
|
# csv.rows.length # => 1
|
50
|
+
# @example CSV encoding
|
51
|
+
# csv = HoneyFormat::CSV.new(csv_string, encoding: "BOM|UTF-16LE:UTF-8")
|
49
52
|
# @see Matrix#new
|
50
53
|
def initialize(
|
51
54
|
csv,
|
@@ -56,6 +59,7 @@ module HoneyFormat
|
|
56
59
|
header_converter: HoneyFormat.header_converter,
|
57
60
|
header_deduplicator: HoneyFormat.config.header_deduplicator,
|
58
61
|
row_builder: nil,
|
62
|
+
encoding: nil,
|
59
63
|
type_map: {},
|
60
64
|
skip_lines: HoneyFormat.config.skip_lines
|
61
65
|
)
|
@@ -65,7 +69,8 @@ module HoneyFormat
|
|
65
69
|
row_sep: row_delimiter,
|
66
70
|
quote_char: quote_character,
|
67
71
|
skip_blanks: true,
|
68
|
-
skip_lines: skip_lines
|
72
|
+
skip_lines: skip_lines,
|
73
|
+
encoding: encoding
|
69
74
|
)
|
70
75
|
super(
|
71
76
|
csv,
|
@@ -10,10 +10,11 @@ module HoneyFormat
|
|
10
10
|
# Instantiate a Header
|
11
11
|
# @return [Header] a new instance of Header.
|
12
12
|
# @param [Array<String>] header array of strings.
|
13
|
-
# @param converter [#call, Symbol]
|
13
|
+
# @param converter [#call, Symbol, Hash]
|
14
14
|
# header converter that implements a #call method
|
15
15
|
# that takes one column (string) argument OR symbol for a registered
|
16
|
-
# converter registry
|
16
|
+
# converter registry OR a hash mapped to a symbol or something that responds
|
17
|
+
# to #call.
|
17
18
|
# @param deduplicator [#call, Symbol]
|
18
19
|
# header deduplicator that implements a #call method
|
19
20
|
# that takes columns Array<String> argument OR symbol for a registered
|
@@ -93,7 +94,7 @@ module HoneyFormat
|
|
93
94
|
private
|
94
95
|
|
95
96
|
# Set the header converter
|
96
|
-
# @param [Symbol, #call] symbol to known converter
|
97
|
+
# @param [Hash, Symbol, #call] symbol to known converter, object that responds to #call or Hash
|
97
98
|
# @return [nil]
|
98
99
|
def converter=(object)
|
99
100
|
if object.is_a?(Symbol)
|
@@ -101,6 +102,11 @@ module HoneyFormat
|
|
101
102
|
return
|
102
103
|
end
|
103
104
|
|
105
|
+
if object.is_a?(Hash)
|
106
|
+
@converter = hash_converter(object)
|
107
|
+
return
|
108
|
+
end
|
109
|
+
|
104
110
|
@converter = object
|
105
111
|
end
|
106
112
|
|
@@ -134,20 +140,17 @@ module HoneyFormat
|
|
134
140
|
# @param [Integer] index the CSV header column index
|
135
141
|
# @return [Symbol] the converted column
|
136
142
|
def convert_column(column, index)
|
137
|
-
|
138
|
-
@converter.call(column)
|
139
|
-
else
|
140
|
-
@converter.call(column, index)
|
141
|
-
end
|
142
|
-
value.to_sym
|
143
|
+
call_column_builder(@converter, column, index)&.to_sym
|
143
144
|
end
|
144
145
|
|
145
|
-
# Returns the
|
146
|
-
# @
|
147
|
-
|
146
|
+
# Returns the callable object method arity
|
147
|
+
# @param [#arity, #call] callable thing that responds to #call and maybe #arity
|
148
|
+
# @return [Integer] the method arity
|
149
|
+
def callable_arity(callable)
|
148
150
|
# procs and lambdas respond to #arity
|
149
|
-
return
|
150
|
-
|
151
|
+
return callable.arity if callable.respond_to?(:arity)
|
152
|
+
|
153
|
+
callable.method(:call).arity
|
151
154
|
end
|
152
155
|
|
153
156
|
# Raises an error if header column is missing/empty
|
@@ -163,5 +166,29 @@ module HoneyFormat
|
|
163
166
|
]
|
164
167
|
raise(Errors::MissingHeaderColumnError, parts.join(' '))
|
165
168
|
end
|
169
|
+
|
170
|
+
def hash_converter(hash)
|
171
|
+
lambda { |value, index|
|
172
|
+
# support strings and symbol keys interchangeably
|
173
|
+
column = hash.fetch(value) do
|
174
|
+
key = value.respond_to?(:to_sym) ? value.to_sym : value
|
175
|
+
# if column is unmapped use the default header converter
|
176
|
+
hash.fetch(key) { HoneyFormat.header_converter.call(value, index) }
|
177
|
+
end
|
178
|
+
|
179
|
+
# The hash can contain mixed values, Symbol and procs
|
180
|
+
if column.respond_to?(:call)
|
181
|
+
column = call_column_builder(column, value, index)
|
182
|
+
end
|
183
|
+
|
184
|
+
column&.to_sym
|
185
|
+
}
|
186
|
+
end
|
187
|
+
|
188
|
+
def call_column_builder(callable, value, index)
|
189
|
+
return callable.call if callable_arity(callable).zero?
|
190
|
+
return callable.call(value) if callable_arity(callable) == 1
|
191
|
+
callable.call(value, index)
|
192
|
+
end
|
166
193
|
end
|
167
194
|
end
|
@@ -4,7 +4,7 @@ module HoneyFormat
|
|
4
4
|
# Default row builder
|
5
5
|
class Row < Struct
|
6
6
|
# Create a row
|
7
|
-
# @return [
|
7
|
+
# @return [Row] returns an instantiated Row
|
8
8
|
# @example
|
9
9
|
# row_klass = Row.new(:id, :username)
|
10
10
|
# row = row_klass.call('1', 'buren')
|
@@ -39,6 +39,7 @@ module HoneyFormat
|
|
39
39
|
build_row!(row)
|
40
40
|
rescue ArgumentError => e
|
41
41
|
raise unless e.message == 'struct size differs'
|
42
|
+
|
42
43
|
raise_invalid_row_length!(e, row)
|
43
44
|
end
|
44
45
|
|
@@ -60,6 +61,7 @@ module HoneyFormat
|
|
60
61
|
end
|
61
62
|
|
62
63
|
return row unless @builder
|
64
|
+
|
63
65
|
@builder.call(row)
|
64
66
|
end
|
65
67
|
|
@@ -40,6 +40,12 @@ module HoneyFormat
|
|
40
40
|
@rows
|
41
41
|
end
|
42
42
|
|
43
|
+
# Return element at given position.
|
44
|
+
# @return [Row] of rows.
|
45
|
+
def [](index)
|
46
|
+
@rows[index]
|
47
|
+
end
|
48
|
+
|
43
49
|
# Return the number of rows
|
44
50
|
# @return [Integer] the number of rows
|
45
51
|
def length
|
@@ -78,6 +84,7 @@ module HoneyFormat
|
|
78
84
|
rows.each do |row|
|
79
85
|
# ignore empty rows
|
80
86
|
next if row.nil? || row.empty? || row == [nil]
|
87
|
+
|
81
88
|
built_rows << builder.build(row)
|
82
89
|
end
|
83
90
|
built_rows
|
@@ -38,9 +38,11 @@ module HoneyFormat
|
|
38
38
|
end
|
39
39
|
|
40
40
|
# Call value type
|
41
|
-
# @param [Symbol, String] type the name of the type
|
41
|
+
# @param [Symbol, String, #call] type the name of the type
|
42
42
|
# @param [Object] value to be converted
|
43
43
|
def call(value, type)
|
44
|
+
return type.call(value) if type.respond_to?(:call)
|
45
|
+
|
44
46
|
self[type].call(value)
|
45
47
|
end
|
46
48
|
|
@@ -72,6 +74,7 @@ module HoneyFormat
|
|
72
74
|
# @return [true, false] true if type exists, false otherwise
|
73
75
|
def type?(type)
|
74
76
|
return false unless keyable?(type)
|
77
|
+
|
75
78
|
@callers.key?(to_key(type))
|
76
79
|
end
|
77
80
|
|
data/lib/honey_format/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: honey_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: benchmark-ips
|
@@ -28,16 +28,22 @@ dependencies:
|
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '1.10'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '3'
|
34
37
|
type: :development
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- - "
|
41
|
+
- - ">"
|
39
42
|
- !ruby/object:Gem::Version
|
40
43
|
version: '1.10'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: byebug
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +64,14 @@ dependencies:
|
|
58
64
|
requirements:
|
59
65
|
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
67
|
+
version: '12.3'
|
62
68
|
type: :development
|
63
69
|
prerelease: false
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
65
71
|
requirements:
|
66
72
|
- - "~>"
|
67
73
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
74
|
+
version: '12.3'
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: rspec
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -162,8 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
168
|
- !ruby/object:Gem::Version
|
163
169
|
version: '0'
|
164
170
|
requirements: []
|
165
|
-
|
166
|
-
rubygems_version: 2.7.6
|
171
|
+
rubygems_version: 3.0.3
|
167
172
|
signing_key:
|
168
173
|
specification_version: 4
|
169
174
|
summary: Makes working with CSVs as smooth as honey.
|