honey_format 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +184 -66
- data/bin/benchmark +23 -52
- data/exe/honey_format +20 -59
- data/honey_format.gemspec +2 -2
- data/lib/honey_format.rb +31 -0
- data/lib/honey_format/cli/benchmark_cli.rb +124 -0
- data/lib/honey_format/cli/cli.rb +81 -0
- data/lib/honey_format/cli/result_writer.rb +38 -0
- data/lib/honey_format/configuration.rb +26 -0
- data/lib/honey_format/csv.rb +49 -52
- data/lib/honey_format/errors.rb +6 -2
- data/lib/honey_format/header.rb +27 -29
- data/lib/honey_format/{convert_header_value.rb → header_column_converter.rb} +9 -6
- data/lib/honey_format/matrix.rb +90 -0
- data/lib/honey_format/row.rb +1 -1
- data/lib/honey_format/row_builder.rb +27 -6
- data/lib/honey_format/rows.rb +12 -5
- data/lib/honey_format/value_converter.rb +112 -0
- data/lib/honey_format/version.rb +1 -1
- metadata +12 -6
data/exe/honey_format
CHANGED
@@ -4,65 +4,26 @@
|
|
4
4
|
require 'bundler/setup' if ENV['HONEY_FORMAT_GEM_DEV']
|
5
5
|
require 'honey_format'
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
parser.on("--columns=id,name", Array, "Select columns.") do |value|
|
29
|
-
columns = value
|
30
|
-
end
|
31
|
-
|
32
|
-
parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
|
33
|
-
output_path = value
|
34
|
-
end
|
35
|
-
|
36
|
-
parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
|
37
|
-
delimiter = value
|
38
|
-
end
|
39
|
-
|
40
|
-
parser.on("-h", "--help", "How to use") do
|
41
|
-
puts parser
|
42
|
-
exit
|
43
|
-
end
|
44
|
-
|
45
|
-
parser.on_tail('--version', 'Show version') do
|
46
|
-
puts "HoneyFormat version #{HoneyFormat::VERSION}"
|
47
|
-
exit
|
48
|
-
end
|
49
|
-
|
50
|
-
# No argument, shows at tail. This will print an options summary.
|
51
|
-
parser.on_tail("-h", "--help", "Show this message") do
|
52
|
-
puts parser
|
53
|
-
exit
|
54
|
-
end
|
55
|
-
end.parse!
|
56
|
-
|
57
|
-
csv = HoneyFormat::CSV.new(File.read(input_path), delimiter: delimiter)
|
58
|
-
if header_only
|
59
|
-
puts csv.columns.join(',')
|
60
|
-
exit
|
61
|
-
end
|
62
|
-
|
63
|
-
csv_string = csv.to_csv(columns: columns&.map(&:to_sym))
|
64
|
-
if output_path
|
65
|
-
File.write(output_path, csv_string)
|
7
|
+
require 'honey_format/cli/cli'
|
8
|
+
|
9
|
+
cli = HoneyFormat::CLI.new
|
10
|
+
options = cli.options
|
11
|
+
|
12
|
+
input_path = options[:input_path] || raise(ArgumentError, 'input path required')
|
13
|
+
csv_input = File.read(input_path)
|
14
|
+
csv = HoneyFormat::CSV.new(csv_input, delimiter: options[:delimiter])
|
15
|
+
|
16
|
+
csv_part = if options[:header_only]
|
17
|
+
csv.header
|
18
|
+
elsif options[:rows_only]
|
19
|
+
csv.rows
|
20
|
+
else
|
21
|
+
csv
|
22
|
+
end
|
23
|
+
|
24
|
+
csv_string = csv_part.to_csv(columns: options[:columns])
|
25
|
+
if path = options[:output_path]
|
26
|
+
File.write(path, csv_string)
|
66
27
|
else
|
67
28
|
puts csv_string
|
68
29
|
end
|
data/honey_format.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ['Jacob Burenstam']
|
10
10
|
spec.email = ['burenstam@gmail.com']
|
11
11
|
|
12
|
-
spec.summary = '
|
13
|
-
spec.description = '
|
12
|
+
spec.summary = 'Makes working with CSVs as smooth as honey.'
|
13
|
+
spec.description = 'Proper objects for CSV headers and rows, convert column values, filter columns and rows, small(-ish) perfomance overhead, no dependencies other than Ruby stdlib.'
|
14
14
|
spec.homepage = 'https://github.com/buren/honey_format'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
data/lib/honey_format.rb
CHANGED
@@ -1,9 +1,40 @@
|
|
1
1
|
require 'honey_format/version'
|
2
|
+
require 'honey_format/configuration'
|
2
3
|
require 'honey_format/errors'
|
4
|
+
require 'honey_format/value_converter'
|
3
5
|
require 'honey_format/csv'
|
4
6
|
|
7
|
+
|
5
8
|
# Main module for HoneyFormat
|
6
9
|
module HoneyFormat
|
7
10
|
# CSV alias
|
8
11
|
HoneyCSV = CSV
|
12
|
+
|
13
|
+
# Configure HoneyFormat
|
14
|
+
# @yield [configuration] the configuration
|
15
|
+
# @yieldparam [Configuration] current configuration
|
16
|
+
# @return [Configuration] current configuration
|
17
|
+
def self.configure
|
18
|
+
@configuration ||= Configuration.new
|
19
|
+
yield(@configuration) if block_given?
|
20
|
+
@configuration
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the current configuration
|
24
|
+
# @return [Configuration] current configuration
|
25
|
+
def self.config
|
26
|
+
configure
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns the configured header converter
|
30
|
+
# @return [#call] the current header converter
|
31
|
+
def self.header_converter
|
32
|
+
config.header_converter
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the configured value converter
|
36
|
+
# @return [#call] the current value converter
|
37
|
+
def self.value_converter
|
38
|
+
config.converter
|
39
|
+
end
|
9
40
|
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'honey_format/cli/result_writer'
|
3
|
+
|
4
|
+
module HoneyFormat
|
5
|
+
# Benchmark CLI
|
6
|
+
# @attr_reader [Hash] options from command line arguments
|
7
|
+
# @attr_reader [CLIResultWriter] writer the CLI result writer
|
8
|
+
class BenchmarkCLI
|
9
|
+
# CSV default test data location
|
10
|
+
CSV_TEST_DATA_URL = 'https://gist.github.com/buren/b669dd82fa37e37672da2cab33c8a830/raw/54ba14a698941ff61f3b854b66df0a7782c79c85/csv_1000_rows.csv'
|
11
|
+
# CSV default test data cache location
|
12
|
+
CSV_TEST_DATA_CACHE_PATH = '/tmp/honey-format-benchmark-test.csv'
|
13
|
+
|
14
|
+
attr_reader :writer, :options
|
15
|
+
|
16
|
+
# Instantiate the CLI
|
17
|
+
# @param writer [CLIResultWriter] the result writer to use
|
18
|
+
def initialize(writer: CLIResultWriter.new)
|
19
|
+
@used_input_path = nil
|
20
|
+
@writer = writer
|
21
|
+
@options = parse_options(argv: ARGV)
|
22
|
+
writer.verbose = true if @options[:verbose]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the expected runtime in seconds
|
26
|
+
# @param report_count [Integer] number of reports in benchmark
|
27
|
+
# @return [Integer] expected runtime in seconds
|
28
|
+
def expected_runtime_seconds(report_count:)
|
29
|
+
runs = report_count * options[:lines_multipliers].length
|
30
|
+
warmup_time_seconds = runs * options[:benchmark_warmup]
|
31
|
+
bench_time_seconds = runs * options[:benchmark_time]
|
32
|
+
|
33
|
+
warmup_time_seconds + bench_time_seconds
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return the input path used for the benchmark
|
37
|
+
# @return [String] the input path (URL or filepath)
|
38
|
+
def used_input_path
|
39
|
+
options[:input_path] || @used_input_path
|
40
|
+
end
|
41
|
+
|
42
|
+
# Download or fetch the default benchmark file from cache
|
43
|
+
# @return [String] CSV file as a string
|
44
|
+
def fetch_default_benchmark_csv
|
45
|
+
cache_path = CSV_TEST_DATA_CACHE_PATH
|
46
|
+
|
47
|
+
if File.exists?(cache_path)
|
48
|
+
writer.puts "Cache file found at #{cache_path}.", verbose: true
|
49
|
+
@used_input_path = cache_path
|
50
|
+
return File.read(cache_path)
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.print 'Downloading test data file from GitHub..', verbose: true
|
54
|
+
require 'open-uri'
|
55
|
+
open(CSV_TEST_DATA_URL).read.tap do |csv|
|
56
|
+
@used_input_path = CSV_TEST_DATA_URL
|
57
|
+
writer.puts 'done!', verbose: true
|
58
|
+
File.write(cache_path, csv)
|
59
|
+
writer.puts "Wrote cache file to #{cache_path}..", verbose: true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Parse command line arguments and return options
|
64
|
+
# @param [Array<String>] argv the command lines arguments
|
65
|
+
# @return [Hash] the command line options
|
66
|
+
def parse_options(argv:)
|
67
|
+
input_path = nil
|
68
|
+
benchmark_time = 30
|
69
|
+
benchmark_warmup = 5
|
70
|
+
lines_multipliers = [1]
|
71
|
+
verbose = false
|
72
|
+
|
73
|
+
OptionParser.new do |parser|
|
74
|
+
parser.banner = "Usage: bin/benchmark [file.csv] [options]"
|
75
|
+
parser.default_argv = ARGV
|
76
|
+
|
77
|
+
parser.on("--csv=[file1.csv]", String, "CSV file(s)") do |value|
|
78
|
+
input_path = value
|
79
|
+
end
|
80
|
+
|
81
|
+
parser.on("--[no-]verbose", "Verbose output") do |value|
|
82
|
+
verbose = value
|
83
|
+
end
|
84
|
+
|
85
|
+
parser.on("--lines-multipliers=[1,10,50]", Array, "Multiply the rows in the CSV file (default: 1)") do |value|
|
86
|
+
lines_multipliers = value.map do |v|
|
87
|
+
Integer(v).tap do |int|
|
88
|
+
unless int >= 1
|
89
|
+
raise(ArgumentError, '--lines-multiplier must be 1 or greater')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
parser.on("--time=[30]", String, "Benchmark time (default: 30)") do |value|
|
96
|
+
benchmark_time = Integer(value)
|
97
|
+
end
|
98
|
+
|
99
|
+
parser.on("--warmup=[30]", String, "Benchmark warmup (default: 30)") do |value|
|
100
|
+
benchmark_warmup = Integer(value)
|
101
|
+
end
|
102
|
+
|
103
|
+
parser.on("-h", "--help", "How to use") do
|
104
|
+
puts parser
|
105
|
+
exit
|
106
|
+
end
|
107
|
+
|
108
|
+
# No argument, shows at tail. This will print an options summary.
|
109
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
110
|
+
puts parser
|
111
|
+
exit
|
112
|
+
end
|
113
|
+
end.parse!
|
114
|
+
|
115
|
+
{
|
116
|
+
input_path: input_path,
|
117
|
+
benchmark_time: benchmark_time,
|
118
|
+
benchmark_warmup: benchmark_warmup,
|
119
|
+
lines_multipliers: lines_multipliers,
|
120
|
+
verbose: verbose,
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Main CLI
|
5
|
+
# @attr_reader [Hash] options from command line arguments
|
6
|
+
class CLI
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
# Instantiate the CLI
|
10
|
+
# @return [CLI] the CLI
|
11
|
+
def initialize
|
12
|
+
@options = parse_options(argv: ARGV)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse command line arguments and return options
|
16
|
+
# @param [Array<String>] argv the command lines arguments
|
17
|
+
# @return [Hash] the command line options
|
18
|
+
def parse_options(argv:)
|
19
|
+
input_path = argv.first
|
20
|
+
columns = nil
|
21
|
+
output_path = nil
|
22
|
+
delimiter = ','
|
23
|
+
header_only = false
|
24
|
+
rows_only = false
|
25
|
+
|
26
|
+
OptionParser.new do |parser|
|
27
|
+
parser.banner = "Usage: honey_format [file.csv] [options]"
|
28
|
+
parser.default_argv = ARGV
|
29
|
+
|
30
|
+
parser.on("--csv=input.csv", String, "CSV file") do |value|
|
31
|
+
input_path = value
|
32
|
+
end
|
33
|
+
|
34
|
+
parser.on("--columns=id,name", Array, "Select columns") do |value|
|
35
|
+
columns = value&.map(&:to_sym)
|
36
|
+
end
|
37
|
+
|
38
|
+
parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
|
39
|
+
output_path = value
|
40
|
+
end
|
41
|
+
|
42
|
+
parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
|
43
|
+
delimiter = value
|
44
|
+
end
|
45
|
+
|
46
|
+
parser.on("--[no-]header-only", "Print only the header") do |value|
|
47
|
+
header_only = value
|
48
|
+
end
|
49
|
+
|
50
|
+
parser.on("--[no-]rows-only", "Print only the rows") do |value|
|
51
|
+
rows_only = value
|
52
|
+
end
|
53
|
+
|
54
|
+
parser.on("-h", "--help", "How to use") do
|
55
|
+
puts parser
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
|
59
|
+
parser.on_tail('--version', 'Show version') do
|
60
|
+
puts "HoneyFormat version #{HoneyFormat::VERSION}"
|
61
|
+
exit
|
62
|
+
end
|
63
|
+
|
64
|
+
# No argument, shows at tail. This will print an options summary.
|
65
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
66
|
+
puts parser
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end.parse!
|
70
|
+
|
71
|
+
{
|
72
|
+
input_path: input_path,
|
73
|
+
columns: columns,
|
74
|
+
output_path: output_path,
|
75
|
+
delimiter: delimiter,
|
76
|
+
header_only: header_only,
|
77
|
+
rows_only: rows_only,
|
78
|
+
}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module HoneyFormat
|
2
|
+
# CLI result writer handles command output
|
3
|
+
# @attr_reader [true, false] verbose the writer mode
|
4
|
+
class CLIResultWriter
|
5
|
+
attr_accessor :verbose
|
6
|
+
|
7
|
+
# Instantiate the result writer
|
8
|
+
# @param verbose [true, false] mode (default: false)
|
9
|
+
# @return [CLIResultWriter] the result writer
|
10
|
+
def initialize(verbose: false)
|
11
|
+
@verbose = verbose
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return if verbose mode is true/false
|
15
|
+
# @return [true, false]
|
16
|
+
def verbose?
|
17
|
+
@verbose
|
18
|
+
end
|
19
|
+
|
20
|
+
# Print the string
|
21
|
+
# @param [String] string to print
|
22
|
+
# @param verbose [true, false] mode (default: false)
|
23
|
+
def print(string, verbose: false)
|
24
|
+
return if !verbose? && verbose
|
25
|
+
|
26
|
+
Kernel.print(string)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Puts the string
|
30
|
+
# @param [String] string to puts
|
31
|
+
# @param verbose [true, false] mode (default: false)
|
32
|
+
def puts(string, verbose: false)
|
33
|
+
return if !verbose? && verbose
|
34
|
+
|
35
|
+
Kernel.puts(string)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module HoneyFormat
|
2
|
+
# Holds HoneyFormat configuration
|
3
|
+
# @attr_reader [#call] header_converter the configured header converter
|
4
|
+
# @attr_reader [#call] converter the configured value converter
|
5
|
+
# @attr_writer [#call] header_converter to use
|
6
|
+
# @attr_writer [#call] converter the value converter to use
|
7
|
+
class Configuration
|
8
|
+
attr_accessor :header_converter, :converter
|
9
|
+
|
10
|
+
# Instantiate configuration
|
11
|
+
def initialize
|
12
|
+
@converter = ValueConverter.new
|
13
|
+
@header_converter = @converter[:header_column]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Set the header converter
|
17
|
+
# @param [Symbol, #call] converter for registered value converter or object that responds to #call
|
18
|
+
# @return [#call] the header converter
|
19
|
+
def header_converter=(converter)
|
20
|
+
if converter.is_a?(Symbol)
|
21
|
+
return @header_converter = @converter[converter]
|
22
|
+
end
|
23
|
+
@header_converter = converter
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,72 +1,69 @@
|
|
1
1
|
require 'csv'
|
2
2
|
|
3
|
-
require 'honey_format/
|
4
|
-
require 'honey_format/
|
3
|
+
require 'honey_format/matrix'
|
4
|
+
# require 'honey_format/rows'
|
5
|
+
# require 'honey_format/header'
|
5
6
|
|
6
7
|
module HoneyFormat
|
7
8
|
# Represents CSV.
|
8
|
-
class CSV
|
9
|
+
class CSV < Matrix
|
9
10
|
# Instantiate CSV.
|
10
11
|
# @return [CSV] a new instance of CSV.
|
11
12
|
# @param [String] csv the CSV string
|
12
|
-
# @param [String] delimiter the CSV delimiter
|
13
|
+
# @param [String] delimiter the CSV column delimiter
|
14
|
+
# @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
|
15
|
+
# @param [String] quote_character the CSV quote character (default: ")
|
13
16
|
# @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
|
14
|
-
# @param [Array<Symbol>] valid_columns array of symbols representing valid columns, if empty all will be considered valid.
|
15
17
|
# @param [#call] header_converter converts header columns.
|
16
18
|
# @param [#call] row_builder will be called for each parsed row.
|
19
|
+
# @param type_map [Hash] map of column_name => type conversion to perform.
|
17
20
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
18
21
|
# @raise [MissingHeaderError] raised when header is missing (empty or nil).
|
19
22
|
# @raise [MissingHeaderColumnError] raised when header column is missing.
|
20
|
-
# @raise [UnknownHeaderColumnError] raised when column is not in valid list.
|
21
23
|
# @raise [RowError] super class of errors raised when there is a row error.
|
22
24
|
# @raise [EmptyRowColumnsError] raised when row columns are empty.
|
23
25
|
# @raise [InvalidRowLengthError] raised when row has more columns than header columns.
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
#
|
32
|
-
# @
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
# csv.to_csv { |row| row.country == 'Sweden' }
|
66
|
-
# @example with both selected columns and rows
|
67
|
-
# csv.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
|
68
|
-
def to_csv(columns: nil, &block)
|
69
|
-
@header.to_csv(columns: columns) + @rows.to_csv(columns: columns, &block)
|
26
|
+
# @example
|
27
|
+
# csv = HoneyFormat::CSV.new(csv_string)
|
28
|
+
# @example With custom delimiter
|
29
|
+
# csv = HoneyFormat::CSV.new(csv_string, delimiter: ';')
|
30
|
+
# @example With custom header converter
|
31
|
+
# converter = proc { |v| v == 'name' ? 'first_name' : v }
|
32
|
+
# csv = HoneyFormat::CSV.new("name,id", header_converter: converter)
|
33
|
+
# csv.columns # => [:first_name, :id]
|
34
|
+
# @example Handle errors
|
35
|
+
# begin
|
36
|
+
# csv = HoneyFormat::CSV.new(csv_string)
|
37
|
+
# rescue HoneyFormat::HeaderError => e
|
38
|
+
# puts "header error: #{e.class}, #{e.message}"
|
39
|
+
# rescue HoneyFormat::RowError => e
|
40
|
+
# puts "row error: #{e.class}, #{e.message}"
|
41
|
+
# end
|
42
|
+
# @see Matrix#new
|
43
|
+
def initialize(
|
44
|
+
csv,
|
45
|
+
delimiter: ',',
|
46
|
+
row_delimiter: :auto,
|
47
|
+
quote_character: '"',
|
48
|
+
header: nil,
|
49
|
+
header_converter: HoneyFormat.header_converter,
|
50
|
+
row_builder: nil,
|
51
|
+
type_map: {}
|
52
|
+
)
|
53
|
+
csv = ::CSV.parse(
|
54
|
+
csv,
|
55
|
+
col_sep: delimiter,
|
56
|
+
row_sep: row_delimiter,
|
57
|
+
quote_char: quote_character,
|
58
|
+
skip_blanks: true
|
59
|
+
)
|
60
|
+
super(
|
61
|
+
csv,
|
62
|
+
header: header,
|
63
|
+
header_converter: header_converter,
|
64
|
+
row_builder: row_builder,
|
65
|
+
type_map: type_map
|
66
|
+
)
|
70
67
|
end
|
71
68
|
end
|
72
69
|
end
|