honey_format 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/exe/honey_format CHANGED
@@ -4,65 +4,26 @@
4
4
  require 'bundler/setup' if ENV['HONEY_FORMAT_GEM_DEV']
5
5
  require 'honey_format'
6
6
 
7
- input_path = ARGV.first
8
-
9
- columns = nil
10
- output_path = nil
11
- delimiter = ','
12
- header_only = false
13
-
14
- require 'optparse'
15
-
16
- OptionParser.new do |parser|
17
- parser.banner = "Usage: honey_format [file.csv] [options]"
18
- parser.default_argv = ARGV
19
-
20
- parser.on("--csv=input.csv", String, "CSV file") do |value|
21
- input_path = value
22
- end
23
-
24
- parser.on("--[no-]only-header=[output.csv]", "Print only the header") do |value|
25
- header_only = value
26
- end
27
-
28
- parser.on("--columns=id,name", Array, "Select columns.") do |value|
29
- columns = value
30
- end
31
-
32
- parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
33
- output_path = value
34
- end
35
-
36
- parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
37
- delimiter = value
38
- end
39
-
40
- parser.on("-h", "--help", "How to use") do
41
- puts parser
42
- exit
43
- end
44
-
45
- parser.on_tail('--version', 'Show version') do
46
- puts "HoneyFormat version #{HoneyFormat::VERSION}"
47
- exit
48
- end
49
-
50
- # No argument, shows at tail. This will print an options summary.
51
- parser.on_tail("-h", "--help", "Show this message") do
52
- puts parser
53
- exit
54
- end
55
- end.parse!
56
-
57
- csv = HoneyFormat::CSV.new(File.read(input_path), delimiter: delimiter)
58
- if header_only
59
- puts csv.columns.join(',')
60
- exit
61
- end
62
-
63
- csv_string = csv.to_csv(columns: columns&.map(&:to_sym))
64
- if output_path
65
- File.write(output_path, csv_string)
7
+ require 'honey_format/cli/cli'
8
+
9
+ cli = HoneyFormat::CLI.new
10
+ options = cli.options
11
+
12
+ input_path = options[:input_path] || raise(ArgumentError, 'input path required')
13
+ csv_input = File.read(input_path)
14
+ csv = HoneyFormat::CSV.new(csv_input, delimiter: options[:delimiter])
15
+
16
+ csv_part = if options[:header_only]
17
+ csv.header
18
+ elsif options[:rows_only]
19
+ csv.rows
20
+ else
21
+ csv
22
+ end
23
+
24
+ csv_string = csv_part.to_csv(columns: options[:columns])
25
+ if path = options[:output_path]
26
+ File.write(path, csv_string)
66
27
  else
67
28
  puts csv_string
68
29
  end
data/honey_format.gemspec CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ['Jacob Burenstam']
10
10
  spec.email = ['burenstam@gmail.com']
11
11
 
12
- spec.summary = 'Convert CSV to objects.'
13
- spec.description = 'Convert CSV to an array of objects with with ease. Create objects for each row with methods matching the column names. No dependencies other than Ruby stdlib.'
12
+ spec.summary = 'Makes working with CSVs as smooth as honey.'
13
+ spec.description = 'Proper objects for CSV headers and rows, convert column values, filter columns and rows, small(-ish) perfomance overhead, no dependencies other than Ruby stdlib.'
14
14
  spec.homepage = 'https://github.com/buren/honey_format'
15
15
  spec.license = 'MIT'
16
16
 
data/lib/honey_format.rb CHANGED
@@ -1,9 +1,40 @@
1
1
  require 'honey_format/version'
2
+ require 'honey_format/configuration'
2
3
  require 'honey_format/errors'
4
+ require 'honey_format/value_converter'
3
5
  require 'honey_format/csv'
4
6
 
7
+
5
8
  # Main module for HoneyFormat
6
9
  module HoneyFormat
7
10
  # CSV alias
8
11
  HoneyCSV = CSV
12
+
13
+ # Configure HoneyFormat
14
+ # @yield [configuration] the configuration
15
+ # @yieldparam [Configuration] current configuration
16
+ # @return [Configuration] current configuration
17
+ def self.configure
18
+ @configuration ||= Configuration.new
19
+ yield(@configuration) if block_given?
20
+ @configuration
21
+ end
22
+
23
+ # Returns the current configuration
24
+ # @return [Configuration] current configuration
25
+ def self.config
26
+ configure
27
+ end
28
+
29
+ # Returns the configured header converter
30
+ # @return [#call] the current header converter
31
+ def self.header_converter
32
+ config.header_converter
33
+ end
34
+
35
+ # Returns the configured value converter
36
+ # @return [#call] the current value converter
37
+ def self.value_converter
38
+ config.converter
39
+ end
9
40
  end
@@ -0,0 +1,124 @@
1
+ require 'optparse'
2
+ require 'honey_format/cli/result_writer'
3
+
4
+ module HoneyFormat
5
+ # Benchmark CLI
6
+ # @attr_reader [Hash] options from command line arguments
7
+ # @attr_reader [CLIResultWriter] writer the CLI result writer
8
+ class BenchmarkCLI
9
+ # CSV default test data location
10
+ CSV_TEST_DATA_URL = 'https://gist.github.com/buren/b669dd82fa37e37672da2cab33c8a830/raw/54ba14a698941ff61f3b854b66df0a7782c79c85/csv_1000_rows.csv'
11
+ # CSV default test data cache location
12
+ CSV_TEST_DATA_CACHE_PATH = '/tmp/honey-format-benchmark-test.csv'
13
+
14
+ attr_reader :writer, :options
15
+
16
+ # Instantiate the CLI
17
+ # @param writer [CLIResultWriter] the result writer to use
18
+ def initialize(writer: CLIResultWriter.new)
19
+ @used_input_path = nil
20
+ @writer = writer
21
+ @options = parse_options(argv: ARGV)
22
+ writer.verbose = true if @options[:verbose]
23
+ end
24
+
25
+ # Returns the expected runtime in seconds
26
+ # @param report_count [Integer] number of reports in benchmark
27
+ # @return [Integer] expected runtime in seconds
28
+ def expected_runtime_seconds(report_count:)
29
+ runs = report_count * options[:lines_multipliers].length
30
+ warmup_time_seconds = runs * options[:benchmark_warmup]
31
+ bench_time_seconds = runs * options[:benchmark_time]
32
+
33
+ warmup_time_seconds + bench_time_seconds
34
+ end
35
+
36
+ # Return the input path used for the benchmark
37
+ # @return [String] the input path (URL or filepath)
38
+ def used_input_path
39
+ options[:input_path] || @used_input_path
40
+ end
41
+
42
+ # Download or fetch the default benchmark file from cache
43
+ # @return [String] CSV file as a string
44
+ def fetch_default_benchmark_csv
45
+ cache_path = CSV_TEST_DATA_CACHE_PATH
46
+
47
+ if File.exists?(cache_path)
48
+ writer.puts "Cache file found at #{cache_path}.", verbose: true
49
+ @used_input_path = cache_path
50
+ return File.read(cache_path)
51
+ end
52
+
53
+ writer.print 'Downloading test data file from GitHub..', verbose: true
54
+ require 'open-uri'
55
+ open(CSV_TEST_DATA_URL).read.tap do |csv|
56
+ @used_input_path = CSV_TEST_DATA_URL
57
+ writer.puts 'done!', verbose: true
58
+ File.write(cache_path, csv)
59
+ writer.puts "Wrote cache file to #{cache_path}..", verbose: true
60
+ end
61
+ end
62
+
63
+ # Parse command line arguments and return options
64
+ # @param [Array<String>] argv the command lines arguments
65
+ # @return [Hash] the command line options
66
+ def parse_options(argv:)
67
+ input_path = nil
68
+ benchmark_time = 30
69
+ benchmark_warmup = 5
70
+ lines_multipliers = [1]
71
+ verbose = false
72
+
73
+ OptionParser.new do |parser|
74
+ parser.banner = "Usage: bin/benchmark [file.csv] [options]"
75
+ parser.default_argv = ARGV
76
+
77
+ parser.on("--csv=[file1.csv]", String, "CSV file(s)") do |value|
78
+ input_path = value
79
+ end
80
+
81
+ parser.on("--[no-]verbose", "Verbose output") do |value|
82
+ verbose = value
83
+ end
84
+
85
+ parser.on("--lines-multipliers=[1,10,50]", Array, "Multiply the rows in the CSV file (default: 1)") do |value|
86
+ lines_multipliers = value.map do |v|
87
+ Integer(v).tap do |int|
88
+ unless int >= 1
89
+ raise(ArgumentError, '--lines-multiplier must be 1 or greater')
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ parser.on("--time=[30]", String, "Benchmark time (default: 30)") do |value|
96
+ benchmark_time = Integer(value)
97
+ end
98
+
99
+ parser.on("--warmup=[30]", String, "Benchmark warmup (default: 30)") do |value|
100
+ benchmark_warmup = Integer(value)
101
+ end
102
+
103
+ parser.on("-h", "--help", "How to use") do
104
+ puts parser
105
+ exit
106
+ end
107
+
108
+ # No argument, shows at tail. This will print an options summary.
109
+ parser.on_tail("-h", "--help", "Show this message") do
110
+ puts parser
111
+ exit
112
+ end
113
+ end.parse!
114
+
115
+ {
116
+ input_path: input_path,
117
+ benchmark_time: benchmark_time,
118
+ benchmark_warmup: benchmark_warmup,
119
+ lines_multipliers: lines_multipliers,
120
+ verbose: verbose,
121
+ }
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,81 @@
1
+ require 'optparse'
2
+
3
+ module HoneyFormat
4
+ # Main CLI
5
+ # @attr_reader [Hash] options from command line arguments
6
+ class CLI
7
+ attr_reader :options
8
+
9
+ # Instantiate the CLI
10
+ # @return [CLI] the CLI
11
+ def initialize
12
+ @options = parse_options(argv: ARGV)
13
+ end
14
+
15
+ # Parse command line arguments and return options
16
+ # @param [Array<String>] argv the command lines arguments
17
+ # @return [Hash] the command line options
18
+ def parse_options(argv:)
19
+ input_path = argv.first
20
+ columns = nil
21
+ output_path = nil
22
+ delimiter = ','
23
+ header_only = false
24
+ rows_only = false
25
+
26
+ OptionParser.new do |parser|
27
+ parser.banner = "Usage: honey_format [file.csv] [options]"
28
+ parser.default_argv = ARGV
29
+
30
+ parser.on("--csv=input.csv", String, "CSV file") do |value|
31
+ input_path = value
32
+ end
33
+
34
+ parser.on("--columns=id,name", Array, "Select columns") do |value|
35
+ columns = value&.map(&:to_sym)
36
+ end
37
+
38
+ parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
39
+ output_path = value
40
+ end
41
+
42
+ parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
43
+ delimiter = value
44
+ end
45
+
46
+ parser.on("--[no-]header-only", "Print only the header") do |value|
47
+ header_only = value
48
+ end
49
+
50
+ parser.on("--[no-]rows-only", "Print only the rows") do |value|
51
+ rows_only = value
52
+ end
53
+
54
+ parser.on("-h", "--help", "How to use") do
55
+ puts parser
56
+ exit
57
+ end
58
+
59
+ parser.on_tail('--version', 'Show version') do
60
+ puts "HoneyFormat version #{HoneyFormat::VERSION}"
61
+ exit
62
+ end
63
+
64
+ # No argument, shows at tail. This will print an options summary.
65
+ parser.on_tail("-h", "--help", "Show this message") do
66
+ puts parser
67
+ exit
68
+ end
69
+ end.parse!
70
+
71
+ {
72
+ input_path: input_path,
73
+ columns: columns,
74
+ output_path: output_path,
75
+ delimiter: delimiter,
76
+ header_only: header_only,
77
+ rows_only: rows_only,
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,38 @@
1
+ module HoneyFormat
2
+ # CLI result writer handles command output
3
+ # @attr_reader [true, false] verbose the writer mode
4
+ class CLIResultWriter
5
+ attr_accessor :verbose
6
+
7
+ # Instantiate the result writer
8
+ # @param verbose [true, false] mode (default: false)
9
+ # @return [CLIResultWriter] the result writer
10
+ def initialize(verbose: false)
11
+ @verbose = verbose
12
+ end
13
+
14
+ # Return if verbose mode is true/false
15
+ # @return [true, false]
16
+ def verbose?
17
+ @verbose
18
+ end
19
+
20
+ # Print the string
21
+ # @param [String] string to print
22
+ # @param verbose [true, false] mode (default: false)
23
+ def print(string, verbose: false)
24
+ return if !verbose? && verbose
25
+
26
+ Kernel.print(string)
27
+ end
28
+
29
+ # Puts the string
30
+ # @param [String] string to puts
31
+ # @param verbose [true, false] mode (default: false)
32
+ def puts(string, verbose: false)
33
+ return if !verbose? && verbose
34
+
35
+ Kernel.puts(string)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,26 @@
1
+ module HoneyFormat
2
+ # Holds HoneyFormat configuration
3
+ # @attr_reader [#call] header_converter the configured header converter
4
+ # @attr_reader [#call] converter the configured value converter
5
+ # @attr_writer [#call] header_converter to use
6
+ # @attr_writer [#call] converter the value converter to use
7
+ class Configuration
8
+ attr_accessor :header_converter, :converter
9
+
10
+ # Instantiate configuration
11
+ def initialize
12
+ @converter = ValueConverter.new
13
+ @header_converter = @converter[:header_column]
14
+ end
15
+
16
+ # Set the header converter
17
+ # @param [Symbol, #call] converter for registered value converter or object that responds to #call
18
+ # @return [#call] the header converter
19
+ def header_converter=(converter)
20
+ if converter.is_a?(Symbol)
21
+ return @header_converter = @converter[converter]
22
+ end
23
+ @header_converter = converter
24
+ end
25
+ end
26
+ end
@@ -1,72 +1,69 @@
1
1
  require 'csv'
2
2
 
3
- require 'honey_format/rows'
4
- require 'honey_format/header'
3
+ require 'honey_format/matrix'
4
+ # require 'honey_format/rows'
5
+ # require 'honey_format/header'
5
6
 
6
7
  module HoneyFormat
7
8
  # Represents CSV.
8
- class CSV
9
+ class CSV < Matrix
9
10
  # Instantiate CSV.
10
11
  # @return [CSV] a new instance of CSV.
11
12
  # @param [String] csv the CSV string
12
- # @param [String] delimiter the CSV delimiter
13
+ # @param [String] delimiter the CSV column delimiter
14
+ # @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
15
+ # @param [String] quote_character the CSV quote character (default: ")
13
16
  # @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
14
- # @param [Array<Symbol>] valid_columns array of symbols representing valid columns, if empty all will be considered valid.
15
17
  # @param [#call] header_converter converts header columns.
16
18
  # @param [#call] row_builder will be called for each parsed row.
19
+ # @param type_map [Hash] map of column_name => type conversion to perform.
17
20
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
18
21
  # @raise [MissingHeaderError] raised when header is missing (empty or nil).
19
22
  # @raise [MissingHeaderColumnError] raised when header column is missing.
20
- # @raise [UnknownHeaderColumnError] raised when column is not in valid list.
21
23
  # @raise [RowError] super class of errors raised when there is a row error.
22
24
  # @raise [EmptyRowColumnsError] raised when row columns are empty.
23
25
  # @raise [InvalidRowLengthError] raised when row has more columns than header columns.
24
- def initialize(csv, delimiter: ',', header: nil, valid_columns: [], header_converter: ConvertHeaderValue, row_builder: nil)
25
- csv = ::CSV.parse(csv, col_sep: delimiter)
26
- header_row = header || csv.shift
27
- @header = Header.new(header_row, valid: valid_columns, converter: header_converter)
28
- @rows = Rows.new(csv, columns, builder: row_builder)
29
- end
30
-
31
- # Original CSV header
32
- # @return [Array<String>] of strings for sanitized header.
33
- def header
34
- @header.original
35
- end
36
-
37
- # CSV columns converted from the original CSV header
38
- # @return [Array<Symbol>] of column identifiers.
39
- def columns
40
- @header.to_a
41
- end
42
-
43
- # @return [Array] of rows.
44
- def rows
45
- @rows
46
- end
47
-
48
- # @yield [row] The given block will be passed for every row.
49
- # @yieldparam [Row] row in the CSV.
50
- # @return [Enumerator] If no block is given, an enumerator object will be returned.
51
- def each_row
52
- return rows.each unless block_given?
53
-
54
- rows.each { |row| yield(row) }
55
- end
56
-
57
- # Convert CSV object as CSV-string.
58
- # @param columns [Array<Symbol>, Set<Symbol>, NilClass] the columns to output, nil means all columns (default: nil)
59
- # @yield [row] The given block will be passed for every row - return truthy if you want the row to be included in the output
60
- # @yieldparam [Row] row
61
- # @return [String] CSV-string representation.
62
- # @example with selected columns
63
- # csv.to_csv(columns: [:id, :country])
64
- # @example with selected rows
65
- # csv.to_csv { |row| row.country == 'Sweden' }
66
- # @example with both selected columns and rows
67
- # csv.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
68
- def to_csv(columns: nil, &block)
69
- @header.to_csv(columns: columns) + @rows.to_csv(columns: columns, &block)
26
+ # @example
27
+ # csv = HoneyFormat::CSV.new(csv_string)
28
+ # @example With custom delimiter
29
+ # csv = HoneyFormat::CSV.new(csv_string, delimiter: ';')
30
+ # @example With custom header converter
31
+ # converter = proc { |v| v == 'name' ? 'first_name' : v }
32
+ # csv = HoneyFormat::CSV.new("name,id", header_converter: converter)
33
+ # csv.columns # => [:first_name, :id]
34
+ # @example Handle errors
35
+ # begin
36
+ # csv = HoneyFormat::CSV.new(csv_string)
37
+ # rescue HoneyFormat::HeaderError => e
38
+ # puts "header error: #{e.class}, #{e.message}"
39
+ # rescue HoneyFormat::RowError => e
40
+ # puts "row error: #{e.class}, #{e.message}"
41
+ # end
42
+ # @see Matrix#new
43
+ def initialize(
44
+ csv,
45
+ delimiter: ',',
46
+ row_delimiter: :auto,
47
+ quote_character: '"',
48
+ header: nil,
49
+ header_converter: HoneyFormat.header_converter,
50
+ row_builder: nil,
51
+ type_map: {}
52
+ )
53
+ csv = ::CSV.parse(
54
+ csv,
55
+ col_sep: delimiter,
56
+ row_sep: row_delimiter,
57
+ quote_char: quote_character,
58
+ skip_blanks: true
59
+ )
60
+ super(
61
+ csv,
62
+ header: header,
63
+ header_converter: header_converter,
64
+ row_builder: row_builder,
65
+ type_map: type_map
66
+ )
70
67
  end
71
68
  end
72
69
  end