honey_format 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +184 -66
- data/bin/benchmark +23 -52
- data/exe/honey_format +20 -59
- data/honey_format.gemspec +2 -2
- data/lib/honey_format.rb +31 -0
- data/lib/honey_format/cli/benchmark_cli.rb +124 -0
- data/lib/honey_format/cli/cli.rb +81 -0
- data/lib/honey_format/cli/result_writer.rb +38 -0
- data/lib/honey_format/configuration.rb +26 -0
- data/lib/honey_format/csv.rb +49 -52
- data/lib/honey_format/errors.rb +6 -2
- data/lib/honey_format/header.rb +27 -29
- data/lib/honey_format/{convert_header_value.rb → header_column_converter.rb} +9 -6
- data/lib/honey_format/matrix.rb +90 -0
- data/lib/honey_format/row.rb +1 -1
- data/lib/honey_format/row_builder.rb +27 -6
- data/lib/honey_format/rows.rb +12 -5
- data/lib/honey_format/value_converter.rb +112 -0
- data/lib/honey_format/version.rb +1 -1
- metadata +12 -6
data/exe/honey_format
CHANGED
@@ -4,65 +4,26 @@
|
|
4
4
|
require 'bundler/setup' if ENV['HONEY_FORMAT_GEM_DEV']
|
5
5
|
require 'honey_format'
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
parser.on("--columns=id,name", Array, "Select columns.") do |value|
|
29
|
-
columns = value
|
30
|
-
end
|
31
|
-
|
32
|
-
parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
|
33
|
-
output_path = value
|
34
|
-
end
|
35
|
-
|
36
|
-
parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
|
37
|
-
delimiter = value
|
38
|
-
end
|
39
|
-
|
40
|
-
parser.on("-h", "--help", "How to use") do
|
41
|
-
puts parser
|
42
|
-
exit
|
43
|
-
end
|
44
|
-
|
45
|
-
parser.on_tail('--version', 'Show version') do
|
46
|
-
puts "HoneyFormat version #{HoneyFormat::VERSION}"
|
47
|
-
exit
|
48
|
-
end
|
49
|
-
|
50
|
-
# No argument, shows at tail. This will print an options summary.
|
51
|
-
parser.on_tail("-h", "--help", "Show this message") do
|
52
|
-
puts parser
|
53
|
-
exit
|
54
|
-
end
|
55
|
-
end.parse!
|
56
|
-
|
57
|
-
csv = HoneyFormat::CSV.new(File.read(input_path), delimiter: delimiter)
|
58
|
-
if header_only
|
59
|
-
puts csv.columns.join(',')
|
60
|
-
exit
|
61
|
-
end
|
62
|
-
|
63
|
-
csv_string = csv.to_csv(columns: columns&.map(&:to_sym))
|
64
|
-
if output_path
|
65
|
-
File.write(output_path, csv_string)
|
7
|
+
require 'honey_format/cli/cli'
|
8
|
+
|
9
|
+
cli = HoneyFormat::CLI.new
|
10
|
+
options = cli.options
|
11
|
+
|
12
|
+
input_path = options[:input_path] || raise(ArgumentError, 'input path required')
|
13
|
+
csv_input = File.read(input_path)
|
14
|
+
csv = HoneyFormat::CSV.new(csv_input, delimiter: options[:delimiter])
|
15
|
+
|
16
|
+
csv_part = if options[:header_only]
|
17
|
+
csv.header
|
18
|
+
elsif options[:rows_only]
|
19
|
+
csv.rows
|
20
|
+
else
|
21
|
+
csv
|
22
|
+
end
|
23
|
+
|
24
|
+
csv_string = csv_part.to_csv(columns: options[:columns])
|
25
|
+
if path = options[:output_path]
|
26
|
+
File.write(path, csv_string)
|
66
27
|
else
|
67
28
|
puts csv_string
|
68
29
|
end
|
data/honey_format.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ['Jacob Burenstam']
|
10
10
|
spec.email = ['burenstam@gmail.com']
|
11
11
|
|
12
|
-
spec.summary = '
|
13
|
-
spec.description = '
|
12
|
+
spec.summary = 'Makes working with CSVs as smooth as honey.'
|
13
|
+
spec.description = 'Proper objects for CSV headers and rows, convert column values, filter columns and rows, small(-ish) perfomance overhead, no dependencies other than Ruby stdlib.'
|
14
14
|
spec.homepage = 'https://github.com/buren/honey_format'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
data/lib/honey_format.rb
CHANGED
@@ -1,9 +1,40 @@
|
|
1
1
|
require 'honey_format/version'
|
2
|
+
require 'honey_format/configuration'
|
2
3
|
require 'honey_format/errors'
|
4
|
+
require 'honey_format/value_converter'
|
3
5
|
require 'honey_format/csv'
|
4
6
|
|
7
|
+
|
5
8
|
# Main module for HoneyFormat
|
6
9
|
module HoneyFormat
|
7
10
|
# CSV alias
|
8
11
|
HoneyCSV = CSV
|
12
|
+
|
13
|
+
# Configure HoneyFormat
|
14
|
+
# @yield [configuration] the configuration
|
15
|
+
# @yieldparam [Configuration] current configuration
|
16
|
+
# @return [Configuration] current configuration
|
17
|
+
def self.configure
|
18
|
+
@configuration ||= Configuration.new
|
19
|
+
yield(@configuration) if block_given?
|
20
|
+
@configuration
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the current configuration
|
24
|
+
# @return [Configuration] current configuration
|
25
|
+
def self.config
|
26
|
+
configure
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns the configured header converter
|
30
|
+
# @return [#call] the current header converter
|
31
|
+
def self.header_converter
|
32
|
+
config.header_converter
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the configured value converter
|
36
|
+
# @return [#call] the current value converter
|
37
|
+
def self.value_converter
|
38
|
+
config.converter
|
39
|
+
end
|
9
40
|
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'honey_format/cli/result_writer'
|
3
|
+
|
4
|
+
module HoneyFormat
|
5
|
+
# Benchmark CLI
|
6
|
+
# @attr_reader [Hash] options from command line arguments
|
7
|
+
# @attr_reader [CLIResultWriter] writer the CLI result writer
|
8
|
+
class BenchmarkCLI
|
9
|
+
# CSV default test data location
|
10
|
+
CSV_TEST_DATA_URL = 'https://gist.github.com/buren/b669dd82fa37e37672da2cab33c8a830/raw/54ba14a698941ff61f3b854b66df0a7782c79c85/csv_1000_rows.csv'
|
11
|
+
# CSV default test data cache location
|
12
|
+
CSV_TEST_DATA_CACHE_PATH = '/tmp/honey-format-benchmark-test.csv'
|
13
|
+
|
14
|
+
attr_reader :writer, :options
|
15
|
+
|
16
|
+
# Instantiate the CLI
|
17
|
+
# @param writer [CLIResultWriter] the result writer to use
|
18
|
+
def initialize(writer: CLIResultWriter.new)
|
19
|
+
@used_input_path = nil
|
20
|
+
@writer = writer
|
21
|
+
@options = parse_options(argv: ARGV)
|
22
|
+
writer.verbose = true if @options[:verbose]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the expected runtime in seconds
|
26
|
+
# @param report_count [Integer] number of reports in benchmark
|
27
|
+
# @return [Integer] expected runtime in seconds
|
28
|
+
def expected_runtime_seconds(report_count:)
|
29
|
+
runs = report_count * options[:lines_multipliers].length
|
30
|
+
warmup_time_seconds = runs * options[:benchmark_warmup]
|
31
|
+
bench_time_seconds = runs * options[:benchmark_time]
|
32
|
+
|
33
|
+
warmup_time_seconds + bench_time_seconds
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return the input path used for the benchmark
|
37
|
+
# @return [String] the input path (URL or filepath)
|
38
|
+
def used_input_path
|
39
|
+
options[:input_path] || @used_input_path
|
40
|
+
end
|
41
|
+
|
42
|
+
# Download or fetch the default benchmark file from cache
|
43
|
+
# @return [String] CSV file as a string
|
44
|
+
def fetch_default_benchmark_csv
|
45
|
+
cache_path = CSV_TEST_DATA_CACHE_PATH
|
46
|
+
|
47
|
+
if File.exists?(cache_path)
|
48
|
+
writer.puts "Cache file found at #{cache_path}.", verbose: true
|
49
|
+
@used_input_path = cache_path
|
50
|
+
return File.read(cache_path)
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.print 'Downloading test data file from GitHub..', verbose: true
|
54
|
+
require 'open-uri'
|
55
|
+
open(CSV_TEST_DATA_URL).read.tap do |csv|
|
56
|
+
@used_input_path = CSV_TEST_DATA_URL
|
57
|
+
writer.puts 'done!', verbose: true
|
58
|
+
File.write(cache_path, csv)
|
59
|
+
writer.puts "Wrote cache file to #{cache_path}..", verbose: true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Parse command line arguments and return options
|
64
|
+
# @param [Array<String>] argv the command lines arguments
|
65
|
+
# @return [Hash] the command line options
|
66
|
+
def parse_options(argv:)
|
67
|
+
input_path = nil
|
68
|
+
benchmark_time = 30
|
69
|
+
benchmark_warmup = 5
|
70
|
+
lines_multipliers = [1]
|
71
|
+
verbose = false
|
72
|
+
|
73
|
+
OptionParser.new do |parser|
|
74
|
+
parser.banner = "Usage: bin/benchmark [file.csv] [options]"
|
75
|
+
parser.default_argv = ARGV
|
76
|
+
|
77
|
+
parser.on("--csv=[file1.csv]", String, "CSV file(s)") do |value|
|
78
|
+
input_path = value
|
79
|
+
end
|
80
|
+
|
81
|
+
parser.on("--[no-]verbose", "Verbose output") do |value|
|
82
|
+
verbose = value
|
83
|
+
end
|
84
|
+
|
85
|
+
parser.on("--lines-multipliers=[1,10,50]", Array, "Multiply the rows in the CSV file (default: 1)") do |value|
|
86
|
+
lines_multipliers = value.map do |v|
|
87
|
+
Integer(v).tap do |int|
|
88
|
+
unless int >= 1
|
89
|
+
raise(ArgumentError, '--lines-multiplier must be 1 or greater')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
parser.on("--time=[30]", String, "Benchmark time (default: 30)") do |value|
|
96
|
+
benchmark_time = Integer(value)
|
97
|
+
end
|
98
|
+
|
99
|
+
parser.on("--warmup=[30]", String, "Benchmark warmup (default: 30)") do |value|
|
100
|
+
benchmark_warmup = Integer(value)
|
101
|
+
end
|
102
|
+
|
103
|
+
parser.on("-h", "--help", "How to use") do
|
104
|
+
puts parser
|
105
|
+
exit
|
106
|
+
end
|
107
|
+
|
108
|
+
# No argument, shows at tail. This will print an options summary.
|
109
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
110
|
+
puts parser
|
111
|
+
exit
|
112
|
+
end
|
113
|
+
end.parse!
|
114
|
+
|
115
|
+
{
|
116
|
+
input_path: input_path,
|
117
|
+
benchmark_time: benchmark_time,
|
118
|
+
benchmark_warmup: benchmark_warmup,
|
119
|
+
lines_multipliers: lines_multipliers,
|
120
|
+
verbose: verbose,
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module HoneyFormat
|
4
|
+
# Main CLI
|
5
|
+
# @attr_reader [Hash] options from command line arguments
|
6
|
+
class CLI
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
# Instantiate the CLI
|
10
|
+
# @return [CLI] the CLI
|
11
|
+
def initialize
|
12
|
+
@options = parse_options(argv: ARGV)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse command line arguments and return options
|
16
|
+
# @param [Array<String>] argv the command lines arguments
|
17
|
+
# @return [Hash] the command line options
|
18
|
+
def parse_options(argv:)
|
19
|
+
input_path = argv.first
|
20
|
+
columns = nil
|
21
|
+
output_path = nil
|
22
|
+
delimiter = ','
|
23
|
+
header_only = false
|
24
|
+
rows_only = false
|
25
|
+
|
26
|
+
OptionParser.new do |parser|
|
27
|
+
parser.banner = "Usage: honey_format [file.csv] [options]"
|
28
|
+
parser.default_argv = ARGV
|
29
|
+
|
30
|
+
parser.on("--csv=input.csv", String, "CSV file") do |value|
|
31
|
+
input_path = value
|
32
|
+
end
|
33
|
+
|
34
|
+
parser.on("--columns=id,name", Array, "Select columns") do |value|
|
35
|
+
columns = value&.map(&:to_sym)
|
36
|
+
end
|
37
|
+
|
38
|
+
parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
|
39
|
+
output_path = value
|
40
|
+
end
|
41
|
+
|
42
|
+
parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
|
43
|
+
delimiter = value
|
44
|
+
end
|
45
|
+
|
46
|
+
parser.on("--[no-]header-only", "Print only the header") do |value|
|
47
|
+
header_only = value
|
48
|
+
end
|
49
|
+
|
50
|
+
parser.on("--[no-]rows-only", "Print only the rows") do |value|
|
51
|
+
rows_only = value
|
52
|
+
end
|
53
|
+
|
54
|
+
parser.on("-h", "--help", "How to use") do
|
55
|
+
puts parser
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
|
59
|
+
parser.on_tail('--version', 'Show version') do
|
60
|
+
puts "HoneyFormat version #{HoneyFormat::VERSION}"
|
61
|
+
exit
|
62
|
+
end
|
63
|
+
|
64
|
+
# No argument, shows at tail. This will print an options summary.
|
65
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
66
|
+
puts parser
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end.parse!
|
70
|
+
|
71
|
+
{
|
72
|
+
input_path: input_path,
|
73
|
+
columns: columns,
|
74
|
+
output_path: output_path,
|
75
|
+
delimiter: delimiter,
|
76
|
+
header_only: header_only,
|
77
|
+
rows_only: rows_only,
|
78
|
+
}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module HoneyFormat
|
2
|
+
# CLI result writer handles command output
|
3
|
+
# @attr_reader [true, false] verbose the writer mode
|
4
|
+
class CLIResultWriter
|
5
|
+
attr_accessor :verbose
|
6
|
+
|
7
|
+
# Instantiate the result writer
|
8
|
+
# @param verbose [true, false] mode (default: false)
|
9
|
+
# @return [CLIResultWriter] the result writer
|
10
|
+
def initialize(verbose: false)
|
11
|
+
@verbose = verbose
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return if verbose mode is true/false
|
15
|
+
# @return [true, false]
|
16
|
+
def verbose?
|
17
|
+
@verbose
|
18
|
+
end
|
19
|
+
|
20
|
+
# Print the string
|
21
|
+
# @param [String] string to print
|
22
|
+
# @param verbose [true, false] mode (default: false)
|
23
|
+
def print(string, verbose: false)
|
24
|
+
return if !verbose? && verbose
|
25
|
+
|
26
|
+
Kernel.print(string)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Puts the string
|
30
|
+
# @param [String] string to puts
|
31
|
+
# @param verbose [true, false] mode (default: false)
|
32
|
+
def puts(string, verbose: false)
|
33
|
+
return if !verbose? && verbose
|
34
|
+
|
35
|
+
Kernel.puts(string)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module HoneyFormat
|
2
|
+
# Holds HoneyFormat configuration
|
3
|
+
# @attr_reader [#call] header_converter the configured header converter
|
4
|
+
# @attr_reader [#call] converter the configured value converter
|
5
|
+
# @attr_writer [#call] header_converter to use
|
6
|
+
# @attr_writer [#call] converter the value converter to use
|
7
|
+
class Configuration
|
8
|
+
attr_accessor :header_converter, :converter
|
9
|
+
|
10
|
+
# Instantiate configuration
|
11
|
+
def initialize
|
12
|
+
@converter = ValueConverter.new
|
13
|
+
@header_converter = @converter[:header_column]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Set the header converter
|
17
|
+
# @param [Symbol, #call] converter for registered value converter or object that responds to #call
|
18
|
+
# @return [#call] the header converter
|
19
|
+
def header_converter=(converter)
|
20
|
+
if converter.is_a?(Symbol)
|
21
|
+
return @header_converter = @converter[converter]
|
22
|
+
end
|
23
|
+
@header_converter = converter
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/honey_format/csv.rb
CHANGED
@@ -1,72 +1,69 @@
|
|
1
1
|
require 'csv'
|
2
2
|
|
3
|
-
require 'honey_format/
|
4
|
-
require 'honey_format/
|
3
|
+
require 'honey_format/matrix'
|
4
|
+
# require 'honey_format/rows'
|
5
|
+
# require 'honey_format/header'
|
5
6
|
|
6
7
|
module HoneyFormat
|
7
8
|
# Represents CSV.
|
8
|
-
class CSV
|
9
|
+
class CSV < Matrix
|
9
10
|
# Instantiate CSV.
|
10
11
|
# @return [CSV] a new instance of CSV.
|
11
12
|
# @param [String] csv the CSV string
|
12
|
-
# @param [String] delimiter the CSV delimiter
|
13
|
+
# @param [String] delimiter the CSV column delimiter
|
14
|
+
# @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
|
15
|
+
# @param [String] quote_character the CSV quote character (default: ")
|
13
16
|
# @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
|
14
|
-
# @param [Array<Symbol>] valid_columns array of symbols representing valid columns, if empty all will be considered valid.
|
15
17
|
# @param [#call] header_converter converts header columns.
|
16
18
|
# @param [#call] row_builder will be called for each parsed row.
|
19
|
+
# @param type_map [Hash] map of column_name => type conversion to perform.
|
17
20
|
# @raise [HeaderError] super class of errors raised when there is a CSV header error.
|
18
21
|
# @raise [MissingHeaderError] raised when header is missing (empty or nil).
|
19
22
|
# @raise [MissingHeaderColumnError] raised when header column is missing.
|
20
|
-
# @raise [UnknownHeaderColumnError] raised when column is not in valid list.
|
21
23
|
# @raise [RowError] super class of errors raised when there is a row error.
|
22
24
|
# @raise [EmptyRowColumnsError] raised when row columns are empty.
|
23
25
|
# @raise [InvalidRowLengthError] raised when row has more columns than header columns.
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
#
|
32
|
-
# @
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
# csv.to_csv { |row| row.country == 'Sweden' }
|
66
|
-
# @example with both selected columns and rows
|
67
|
-
# csv.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
|
68
|
-
def to_csv(columns: nil, &block)
|
69
|
-
@header.to_csv(columns: columns) + @rows.to_csv(columns: columns, &block)
|
26
|
+
# @example
|
27
|
+
# csv = HoneyFormat::CSV.new(csv_string)
|
28
|
+
# @example With custom delimiter
|
29
|
+
# csv = HoneyFormat::CSV.new(csv_string, delimiter: ';')
|
30
|
+
# @example With custom header converter
|
31
|
+
# converter = proc { |v| v == 'name' ? 'first_name' : v }
|
32
|
+
# csv = HoneyFormat::CSV.new("name,id", header_converter: converter)
|
33
|
+
# csv.columns # => [:first_name, :id]
|
34
|
+
# @example Handle errors
|
35
|
+
# begin
|
36
|
+
# csv = HoneyFormat::CSV.new(csv_string)
|
37
|
+
# rescue HoneyFormat::HeaderError => e
|
38
|
+
# puts "header error: #{e.class}, #{e.message}"
|
39
|
+
# rescue HoneyFormat::RowError => e
|
40
|
+
# puts "row error: #{e.class}, #{e.message}"
|
41
|
+
# end
|
42
|
+
# @see Matrix#new
|
43
|
+
def initialize(
|
44
|
+
csv,
|
45
|
+
delimiter: ',',
|
46
|
+
row_delimiter: :auto,
|
47
|
+
quote_character: '"',
|
48
|
+
header: nil,
|
49
|
+
header_converter: HoneyFormat.header_converter,
|
50
|
+
row_builder: nil,
|
51
|
+
type_map: {}
|
52
|
+
)
|
53
|
+
csv = ::CSV.parse(
|
54
|
+
csv,
|
55
|
+
col_sep: delimiter,
|
56
|
+
row_sep: row_delimiter,
|
57
|
+
quote_char: quote_character,
|
58
|
+
skip_blanks: true
|
59
|
+
)
|
60
|
+
super(
|
61
|
+
csv,
|
62
|
+
header: header,
|
63
|
+
header_converter: header_converter,
|
64
|
+
row_builder: row_builder,
|
65
|
+
type_map: type_map
|
66
|
+
)
|
70
67
|
end
|
71
68
|
end
|
72
69
|
end
|