honey_format 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
data/exe/honey_format CHANGED
@@ -4,65 +4,26 @@
4
4
  require 'bundler/setup' if ENV['HONEY_FORMAT_GEM_DEV']
5
5
  require 'honey_format'
6
6
 
7
- input_path = ARGV.first
8
-
9
- columns = nil
10
- output_path = nil
11
- delimiter = ','
12
- header_only = false
13
-
14
- require 'optparse'
15
-
16
- OptionParser.new do |parser|
17
- parser.banner = "Usage: honey_format [file.csv] [options]"
18
- parser.default_argv = ARGV
19
-
20
- parser.on("--csv=input.csv", String, "CSV file") do |value|
21
- input_path = value
22
- end
23
-
24
- parser.on("--[no-]only-header=[output.csv]", "Print only the header") do |value|
25
- header_only = value
26
- end
27
-
28
- parser.on("--columns=id,name", Array, "Select columns.") do |value|
29
- columns = value
30
- end
31
-
32
- parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
33
- output_path = value
34
- end
35
-
36
- parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
37
- delimiter = value
38
- end
39
-
40
- parser.on("-h", "--help", "How to use") do
41
- puts parser
42
- exit
43
- end
44
-
45
- parser.on_tail('--version', 'Show version') do
46
- puts "HoneyFormat version #{HoneyFormat::VERSION}"
47
- exit
48
- end
49
-
50
- # No argument, shows at tail. This will print an options summary.
51
- parser.on_tail("-h", "--help", "Show this message") do
52
- puts parser
53
- exit
54
- end
55
- end.parse!
56
-
57
- csv = HoneyFormat::CSV.new(File.read(input_path), delimiter: delimiter)
58
- if header_only
59
- puts csv.columns.join(',')
60
- exit
61
- end
62
-
63
- csv_string = csv.to_csv(columns: columns&.map(&:to_sym))
64
- if output_path
65
- File.write(output_path, csv_string)
7
+ require 'honey_format/cli/cli'
8
+
9
+ cli = HoneyFormat::CLI.new
10
+ options = cli.options
11
+
12
+ input_path = options[:input_path] || raise(ArgumentError, 'input path required')
13
+ csv_input = File.read(input_path)
14
+ csv = HoneyFormat::CSV.new(csv_input, delimiter: options[:delimiter])
15
+
16
+ csv_part = if options[:header_only]
17
+ csv.header
18
+ elsif options[:rows_only]
19
+ csv.rows
20
+ else
21
+ csv
22
+ end
23
+
24
+ csv_string = csv_part.to_csv(columns: options[:columns])
25
+ if path = options[:output_path]
26
+ File.write(path, csv_string)
66
27
  else
67
28
  puts csv_string
68
29
  end
data/honey_format.gemspec CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ['Jacob Burenstam']
10
10
  spec.email = ['burenstam@gmail.com']
11
11
 
12
- spec.summary = 'Convert CSV to objects.'
13
- spec.description = 'Convert CSV to an array of objects with with ease. Create objects for each row with methods matching the column names. No dependencies other than Ruby stdlib.'
12
+ spec.summary = 'Makes working with CSVs as smooth as honey.'
13
+ spec.description = 'Proper objects for CSV headers and rows, convert column values, filter columns and rows, small(-ish) perfomance overhead, no dependencies other than Ruby stdlib.'
14
14
  spec.homepage = 'https://github.com/buren/honey_format'
15
15
  spec.license = 'MIT'
16
16
 
data/lib/honey_format.rb CHANGED
@@ -1,9 +1,40 @@
1
1
  require 'honey_format/version'
2
+ require 'honey_format/configuration'
2
3
  require 'honey_format/errors'
4
+ require 'honey_format/value_converter'
3
5
  require 'honey_format/csv'
4
6
 
7
+
5
8
  # Main module for HoneyFormat
6
9
  module HoneyFormat
7
10
  # CSV alias
8
11
  HoneyCSV = CSV
12
+
13
+ # Configure HoneyFormat
14
+ # @yield [configuration] the configuration
15
+ # @yieldparam [Configuration] current configuration
16
+ # @return [Configuration] current configuration
17
+ def self.configure
18
+ @configuration ||= Configuration.new
19
+ yield(@configuration) if block_given?
20
+ @configuration
21
+ end
22
+
23
+ # Returns the current configuration
24
+ # @return [Configuration] current configuration
25
+ def self.config
26
+ configure
27
+ end
28
+
29
+ # Returns the configured header converter
30
+ # @return [#call] the current header converter
31
+ def self.header_converter
32
+ config.header_converter
33
+ end
34
+
35
+ # Returns the configured value converter
36
+ # @return [#call] the current value converter
37
+ def self.value_converter
38
+ config.converter
39
+ end
9
40
  end
@@ -0,0 +1,124 @@
1
+ require 'optparse'
2
+ require 'honey_format/cli/result_writer'
3
+
4
+ module HoneyFormat
5
+ # Benchmark CLI
6
+ # @attr_reader [Hash] options from command line arguments
7
+ # @attr_reader [CLIResultWriter] writer the CLI result writer
8
+ class BenchmarkCLI
9
+ # CSV default test data location
10
+ CSV_TEST_DATA_URL = 'https://gist.github.com/buren/b669dd82fa37e37672da2cab33c8a830/raw/54ba14a698941ff61f3b854b66df0a7782c79c85/csv_1000_rows.csv'
11
+ # CSV default test data cache location
12
+ CSV_TEST_DATA_CACHE_PATH = '/tmp/honey-format-benchmark-test.csv'
13
+
14
+ attr_reader :writer, :options
15
+
16
+ # Instantiate the CLI
17
+ # @param writer [CLIResultWriter] the result writer to use
18
+ def initialize(writer: CLIResultWriter.new)
19
+ @used_input_path = nil
20
+ @writer = writer
21
+ @options = parse_options(argv: ARGV)
22
+ writer.verbose = true if @options[:verbose]
23
+ end
24
+
25
+ # Returns the expected runtime in seconds
26
+ # @param report_count [Integer] number of reports in benchmark
27
+ # @return [Integer] expected runtime in seconds
28
+ def expected_runtime_seconds(report_count:)
29
+ runs = report_count * options[:lines_multipliers].length
30
+ warmup_time_seconds = runs * options[:benchmark_warmup]
31
+ bench_time_seconds = runs * options[:benchmark_time]
32
+
33
+ warmup_time_seconds + bench_time_seconds
34
+ end
35
+
36
+ # Return the input path used for the benchmark
37
+ # @return [String] the input path (URL or filepath)
38
+ def used_input_path
39
+ options[:input_path] || @used_input_path
40
+ end
41
+
42
+ # Download or fetch the default benchmark file from cache
43
+ # @return [String] CSV file as a string
44
+ def fetch_default_benchmark_csv
45
+ cache_path = CSV_TEST_DATA_CACHE_PATH
46
+
47
+ if File.exists?(cache_path)
48
+ writer.puts "Cache file found at #{cache_path}.", verbose: true
49
+ @used_input_path = cache_path
50
+ return File.read(cache_path)
51
+ end
52
+
53
+ writer.print 'Downloading test data file from GitHub..', verbose: true
54
+ require 'open-uri'
55
+ open(CSV_TEST_DATA_URL).read.tap do |csv|
56
+ @used_input_path = CSV_TEST_DATA_URL
57
+ writer.puts 'done!', verbose: true
58
+ File.write(cache_path, csv)
59
+ writer.puts "Wrote cache file to #{cache_path}..", verbose: true
60
+ end
61
+ end
62
+
63
+ # Parse command line arguments and return options
64
+ # @param [Array<String>] argv the command lines arguments
65
+ # @return [Hash] the command line options
66
+ def parse_options(argv:)
67
+ input_path = nil
68
+ benchmark_time = 30
69
+ benchmark_warmup = 5
70
+ lines_multipliers = [1]
71
+ verbose = false
72
+
73
+ OptionParser.new do |parser|
74
+ parser.banner = "Usage: bin/benchmark [file.csv] [options]"
75
+ parser.default_argv = ARGV
76
+
77
+ parser.on("--csv=[file1.csv]", String, "CSV file(s)") do |value|
78
+ input_path = value
79
+ end
80
+
81
+ parser.on("--[no-]verbose", "Verbose output") do |value|
82
+ verbose = value
83
+ end
84
+
85
+ parser.on("--lines-multipliers=[1,10,50]", Array, "Multiply the rows in the CSV file (default: 1)") do |value|
86
+ lines_multipliers = value.map do |v|
87
+ Integer(v).tap do |int|
88
+ unless int >= 1
89
+ raise(ArgumentError, '--lines-multiplier must be 1 or greater')
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ parser.on("--time=[30]", String, "Benchmark time (default: 30)") do |value|
96
+ benchmark_time = Integer(value)
97
+ end
98
+
99
+ parser.on("--warmup=[30]", String, "Benchmark warmup (default: 30)") do |value|
100
+ benchmark_warmup = Integer(value)
101
+ end
102
+
103
+ parser.on("-h", "--help", "How to use") do
104
+ puts parser
105
+ exit
106
+ end
107
+
108
+ # No argument, shows at tail. This will print an options summary.
109
+ parser.on_tail("-h", "--help", "Show this message") do
110
+ puts parser
111
+ exit
112
+ end
113
+ end.parse!
114
+
115
+ {
116
+ input_path: input_path,
117
+ benchmark_time: benchmark_time,
118
+ benchmark_warmup: benchmark_warmup,
119
+ lines_multipliers: lines_multipliers,
120
+ verbose: verbose,
121
+ }
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,81 @@
1
+ require 'optparse'
2
+
3
+ module HoneyFormat
4
+ # Main CLI
5
+ # @attr_reader [Hash] options from command line arguments
6
+ class CLI
7
+ attr_reader :options
8
+
9
+ # Instantiate the CLI
10
+ # @return [CLI] the CLI
11
+ def initialize
12
+ @options = parse_options(argv: ARGV)
13
+ end
14
+
15
+ # Parse command line arguments and return options
16
+ # @param [Array<String>] argv the command lines arguments
17
+ # @return [Hash] the command line options
18
+ def parse_options(argv:)
19
+ input_path = argv.first
20
+ columns = nil
21
+ output_path = nil
22
+ delimiter = ','
23
+ header_only = false
24
+ rows_only = false
25
+
26
+ OptionParser.new do |parser|
27
+ parser.banner = "Usage: honey_format [file.csv] [options]"
28
+ parser.default_argv = ARGV
29
+
30
+ parser.on("--csv=input.csv", String, "CSV file") do |value|
31
+ input_path = value
32
+ end
33
+
34
+ parser.on("--columns=id,name", Array, "Select columns") do |value|
35
+ columns = value&.map(&:to_sym)
36
+ end
37
+
38
+ parser.on("--output=output.csv", String, "CSV output (STDOUT otherwise)") do |value|
39
+ output_path = value
40
+ end
41
+
42
+ parser.on("--delimiter=,", String, "CSV delimiter (default: ,)") do |value|
43
+ delimiter = value
44
+ end
45
+
46
+ parser.on("--[no-]header-only", "Print only the header") do |value|
47
+ header_only = value
48
+ end
49
+
50
+ parser.on("--[no-]rows-only", "Print only the rows") do |value|
51
+ rows_only = value
52
+ end
53
+
54
+ parser.on("-h", "--help", "How to use") do
55
+ puts parser
56
+ exit
57
+ end
58
+
59
+ parser.on_tail('--version', 'Show version') do
60
+ puts "HoneyFormat version #{HoneyFormat::VERSION}"
61
+ exit
62
+ end
63
+
64
+ # No argument, shows at tail. This will print an options summary.
65
+ parser.on_tail("-h", "--help", "Show this message") do
66
+ puts parser
67
+ exit
68
+ end
69
+ end.parse!
70
+
71
+ {
72
+ input_path: input_path,
73
+ columns: columns,
74
+ output_path: output_path,
75
+ delimiter: delimiter,
76
+ header_only: header_only,
77
+ rows_only: rows_only,
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,38 @@
1
+ module HoneyFormat
2
+ # CLI result writer handles command output
3
+ # @attr_reader [true, false] verbose the writer mode
4
+ class CLIResultWriter
5
+ attr_accessor :verbose
6
+
7
+ # Instantiate the result writer
8
+ # @param verbose [true, false] mode (default: false)
9
+ # @return [CLIResultWriter] the result writer
10
+ def initialize(verbose: false)
11
+ @verbose = verbose
12
+ end
13
+
14
+ # Return if verbose mode is true/false
15
+ # @return [true, false]
16
+ def verbose?
17
+ @verbose
18
+ end
19
+
20
+ # Print the string
21
+ # @param [String] string to print
22
+ # @param verbose [true, false] mode (default: false)
23
+ def print(string, verbose: false)
24
+ return if !verbose? && verbose
25
+
26
+ Kernel.print(string)
27
+ end
28
+
29
+ # Puts the string
30
+ # @param [String] string to puts
31
+ # @param verbose [true, false] mode (default: false)
32
+ def puts(string, verbose: false)
33
+ return if !verbose? && verbose
34
+
35
+ Kernel.puts(string)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,26 @@
1
+ module HoneyFormat
2
+ # Holds HoneyFormat configuration
3
+ # @attr_reader [#call] header_converter the configured header converter
4
+ # @attr_reader [#call] converter the configured value converter
5
+ # @attr_writer [#call] header_converter to use
6
+ # @attr_writer [#call] converter the value converter to use
7
+ class Configuration
8
+ attr_accessor :header_converter, :converter
9
+
10
+ # Instantiate configuration
11
+ def initialize
12
+ @converter = ValueConverter.new
13
+ @header_converter = @converter[:header_column]
14
+ end
15
+
16
+ # Set the header converter
17
+ # @param [Symbol, #call] converter for registered value converter or object that responds to #call
18
+ # @return [#call] the header converter
19
+ def header_converter=(converter)
20
+ if converter.is_a?(Symbol)
21
+ return @header_converter = @converter[converter]
22
+ end
23
+ @header_converter = converter
24
+ end
25
+ end
26
+ end
@@ -1,72 +1,69 @@
1
1
  require 'csv'
2
2
 
3
- require 'honey_format/rows'
4
- require 'honey_format/header'
3
+ require 'honey_format/matrix'
4
+ # require 'honey_format/rows'
5
+ # require 'honey_format/header'
5
6
 
6
7
  module HoneyFormat
7
8
  # Represents CSV.
8
- class CSV
9
+ class CSV < Matrix
9
10
  # Instantiate CSV.
10
11
  # @return [CSV] a new instance of CSV.
11
12
  # @param [String] csv the CSV string
12
- # @param [String] delimiter the CSV delimiter
13
+ # @param [String] delimiter the CSV column delimiter
14
+ # @param [String, Symbol] row_delimiter the CSV row delimiter (default: :auto)
15
+ # @param [String] quote_character the CSV quote character (default: ")
13
16
  # @param [Array<String>] header optional argument that represents CSV header, required if the CSV file lacks a header row.
14
- # @param [Array<Symbol>] valid_columns array of symbols representing valid columns, if empty all will be considered valid.
15
17
  # @param [#call] header_converter converts header columns.
16
18
  # @param [#call] row_builder will be called for each parsed row.
19
+ # @param type_map [Hash] map of column_name => type conversion to perform.
17
20
  # @raise [HeaderError] super class of errors raised when there is a CSV header error.
18
21
  # @raise [MissingHeaderError] raised when header is missing (empty or nil).
19
22
  # @raise [MissingHeaderColumnError] raised when header column is missing.
20
- # @raise [UnknownHeaderColumnError] raised when column is not in valid list.
21
23
  # @raise [RowError] super class of errors raised when there is a row error.
22
24
  # @raise [EmptyRowColumnsError] raised when row columns are empty.
23
25
  # @raise [InvalidRowLengthError] raised when row has more columns than header columns.
24
- def initialize(csv, delimiter: ',', header: nil, valid_columns: [], header_converter: ConvertHeaderValue, row_builder: nil)
25
- csv = ::CSV.parse(csv, col_sep: delimiter)
26
- header_row = header || csv.shift
27
- @header = Header.new(header_row, valid: valid_columns, converter: header_converter)
28
- @rows = Rows.new(csv, columns, builder: row_builder)
29
- end
30
-
31
- # Original CSV header
32
- # @return [Array<String>] of strings for sanitized header.
33
- def header
34
- @header.original
35
- end
36
-
37
- # CSV columns converted from the original CSV header
38
- # @return [Array<Symbol>] of column identifiers.
39
- def columns
40
- @header.to_a
41
- end
42
-
43
- # @return [Array] of rows.
44
- def rows
45
- @rows
46
- end
47
-
48
- # @yield [row] The given block will be passed for every row.
49
- # @yieldparam [Row] row in the CSV.
50
- # @return [Enumerator] If no block is given, an enumerator object will be returned.
51
- def each_row
52
- return rows.each unless block_given?
53
-
54
- rows.each { |row| yield(row) }
55
- end
56
-
57
- # Convert CSV object as CSV-string.
58
- # @param columns [Array<Symbol>, Set<Symbol>, NilClass] the columns to output, nil means all columns (default: nil)
59
- # @yield [row] The given block will be passed for every row - return truthy if you want the row to be included in the output
60
- # @yieldparam [Row] row
61
- # @return [String] CSV-string representation.
62
- # @example with selected columns
63
- # csv.to_csv(columns: [:id, :country])
64
- # @example with selected rows
65
- # csv.to_csv { |row| row.country == 'Sweden' }
66
- # @example with both selected columns and rows
67
- # csv.to_csv(columns: [:id, :country]) { |row| row.country == 'Sweden' }
68
- def to_csv(columns: nil, &block)
69
- @header.to_csv(columns: columns) + @rows.to_csv(columns: columns, &block)
26
+ # @example
27
+ # csv = HoneyFormat::CSV.new(csv_string)
28
+ # @example With custom delimiter
29
+ # csv = HoneyFormat::CSV.new(csv_string, delimiter: ';')
30
+ # @example With custom header converter
31
+ # converter = proc { |v| v == 'name' ? 'first_name' : v }
32
+ # csv = HoneyFormat::CSV.new("name,id", header_converter: converter)
33
+ # csv.columns # => [:first_name, :id]
34
+ # @example Handle errors
35
+ # begin
36
+ # csv = HoneyFormat::CSV.new(csv_string)
37
+ # rescue HoneyFormat::HeaderError => e
38
+ # puts "header error: #{e.class}, #{e.message}"
39
+ # rescue HoneyFormat::RowError => e
40
+ # puts "row error: #{e.class}, #{e.message}"
41
+ # end
42
+ # @see Matrix#new
43
+ def initialize(
44
+ csv,
45
+ delimiter: ',',
46
+ row_delimiter: :auto,
47
+ quote_character: '"',
48
+ header: nil,
49
+ header_converter: HoneyFormat.header_converter,
50
+ row_builder: nil,
51
+ type_map: {}
52
+ )
53
+ csv = ::CSV.parse(
54
+ csv,
55
+ col_sep: delimiter,
56
+ row_sep: row_delimiter,
57
+ quote_char: quote_character,
58
+ skip_blanks: true
59
+ )
60
+ super(
61
+ csv,
62
+ header: header,
63
+ header_converter: header_converter,
64
+ row_builder: row_builder,
65
+ type_map: type_map
66
+ )
70
67
  end
71
68
  end
72
69
  end