honey_format 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3ba71ccfedf80d0f721f3b205a6eb8f247284704
4
- data.tar.gz: e6091ac68245db3e1c1d0918e9aed66361b38a5e
3
+ metadata.gz: 4a3591ebdea8bbde84b5cd4cef7915917ec9b929
4
+ data.tar.gz: f8ae5d74d90fe3742c322e94f2f5881c879d85bb
5
5
  SHA512:
6
- metadata.gz: 918906a7bdfc3d6ef900794832a8359811ee69ea9c68ab8a563bc5199308db91637fa8610f7b265bc7ddbcad13a3f74b1e6f467df5247eb0ed2d022bd9ce1bf2
7
- data.tar.gz: 4f46d54f77c109349131ea3c3db982e1671aa3597ea1383b3cd7b6176a64d72bb0650fe0b772fa3542883c38761589344c23fb510ec110e21ce15a80d7845819
6
+ metadata.gz: 2821d59e1072042093d590f0a31883dd0403fc552b5ee8f7a5d16f2849afb37a5003dbb37c6054a620ff99b4b97eca1ea69f751118c7f156af96c2849b634367
7
+ data.tar.gz: 17b3aa8e0f0fc46391395ef602cff94753fbd5e6fe96cd28bf19cce859626ebefbf99b1b54460b72ac3d4698c7b71cfe7a459a02ab93d5e89548c058068ba411
data/CHANGELOG.md ADDED
@@ -0,0 +1,8 @@
1
+ # v0.2.0
2
+
3
+ * More explicit exception classes
4
+ * Restructured internals
5
+
6
+ ## v0.1.0
7
+
8
+ Initial release
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # HoneyFormat [![Build Status](https://travis-ci.org/buren/honey_format.svg)](https://travis-ci.org/buren/honey_format) [![Code Climate](https://codeclimate.com/github/buren/honey_format/badges/gpa.svg)](https://codeclimate.com/github/buren/honey_format)
1
+ # HoneyFormat [![Build Status](https://travis-ci.org/buren/honey_format.svg)](https://travis-ci.org/buren/honey_format) [![Code Climate](https://codeclimate.com/github/buren/honey_format/badges/gpa.svg)](https://codeclimate.com/github/buren/honey_format) ![Docs badge](https://inch-ci.org/github/buren/honey_format.svg?branch=master)
2
2
 
3
3
  Convert CSV to object with one command.
4
4
 
@@ -39,11 +39,13 @@ By default assumes a header in the CSV file.
39
39
  csv_string = "Id, Username\n 1, buren"
40
40
  csv = HoneyFormat::CSV.new(csv_string)
41
41
  csv.header # => ["Id", "Username"]
42
+ csv.column # => [:id, :username]
42
43
 
43
44
  include HoneyFormat
44
45
  # If included you can use the HoneyCSV shorthand
45
46
  csv = HoneyCSV.new(csv_string)
46
- user = csv.rows # => [#<struct id="1", username="buren">]
47
+ rows = csv.rows # => [#<struct id="1", username="buren">]
48
+ user = rows.first
47
49
  user.id # => "1"
48
50
  user.username # => "buren"
49
51
  ```
@@ -54,6 +56,7 @@ csv_string = "Id, Username\n 1, buren"
54
56
  # Invalid
55
57
  HoneyCSV.new(csv_string, valid_columns: [:something, :username])
56
58
  # => #<HoneyFormat::CSVHeaderColumnError: key :id ("Id") not in [:something, :username]>
59
+
57
60
  # Valid
58
61
  csv = HoneyCSV.new(csv_string, valid_columns: [:id, :username])
59
62
  csv.rows.first.username # => "buren"
@@ -66,26 +69,53 @@ csv = HoneyCSV.new(csv_string, header: ['Id', 'Username'])
66
69
  csv.rows.first.username # => "buren"
67
70
  ```
68
71
 
72
+ If your header contains special chars and/or chars that can't be part of Ruby method names,
73
+ things get a little awkward..
74
+ ```ruby
75
+ csv_string = "ÅÄÖ\nSwedish characters"
76
+ user = HoneyCSV.new(csv_string).rows.first
77
+ # Note that these chars aren't "downcased",
78
+ # "ÅÄÖ".downcase # => "ÅÄÖ"
79
+ user.ÅÄÖ # => "Swedish characters"
80
+
81
+ csv_string = "First-Name\nJacob"
82
+ user = HoneyCSV.new(csv_string).rows.first
83
+ user.public_send(:"first-name") # => "Jacob"
84
+ ```
85
+
86
+ If you want to see more usage examples check out the `spec/` directory.
87
+
69
88
  ## Benchmark
70
89
 
71
90
  _Note_: This gem, adds some overhead to parsing a CSV string. I've included some benchmarks below, your mileage may vary..
72
91
 
73
- Benchmarks for a 21MB file with 10 columns (MBP 2013 OSX 10.10).
92
+ Benchmarks, using the `benchmark-ips` gem, CSV with 11 columns in MBP 2013 OSX 10.10.
93
+
94
+ 124KB (~1000 lines )
74
95
 
75
96
  ```
76
97
  Calculating -------------------------------------
77
- stdlib CSV 1.000 i/100ms
78
- HoneyFormat::CSV 1.000 i/100ms
98
+ stdlib CSV 6.000 i/100ms
99
+ HoneyFormat::CSV 5.000 i/100ms
79
100
  -------------------------------------------------
80
- stdlib CSV 0.3170.0%) i/s - 4.000 in 12.636647s
81
- HoneyFormat::CSV 0.3350.0%) i/s - 4.000 in 12.061301s
101
+ stdlib CSV 64.2364.7%) i/s - 642.000
102
+ HoneyFormat::CSV 52.7625.7%) i/s - 530.000
103
+
104
+ Comparison:
105
+ stdlib CSV: 64.2 i/s
106
+ HoneyFormat::CSV: 52.8 i/s - 1.22x slower
107
+ ```
108
+
109
+ 20MB (~180k lines)
82
110
 
111
+ ```
83
112
  Comparison:
84
- HoneyFormat::CSV: 0.3 i/s
85
- stdlib CSV: 0.3 i/s - 1.06x slower
113
+ stdlib CSV: 0.3 i/s
114
+ HoneyFormat::CSV: 0.3 i/s - 1.26x slower
86
115
  ```
87
116
 
88
- Run the benchmark as a regular ruby file: `ruby benchmark.rb`.
117
+ See `bin/benchmark` for details.
118
+ Run benchmark: `bin/benchmark`.
89
119
 
90
120
  ## Development
91
121
 
data/bin/benchmark ADDED
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'honey_format'
4
+
5
+ require 'benchmark/ips'
6
+ require 'csv'
7
+
8
+ # Assumes that you have a file "benchmark.csv" in your current directory
9
+ csv = File.read('benchmark-20mb.csv')
10
+
11
+ Benchmark.ips do |x|
12
+ x.time = 10
13
+ x.warmup = 2
14
+
15
+ x.report('stdlib CSV') { CSV.parse(csv) }
16
+ x.report('HoneyFormat::CSV') { HoneyFormat::CSV.new(csv).rows }
17
+
18
+ x.compare!
19
+ end
data/honey_format.gemspec CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency 'bundler', '~> 1.10'
22
22
  spec.add_development_dependency 'rake', '~> 10.0'
23
+ spec.add_development_dependency 'benchmark-ips'
23
24
  spec.add_development_dependency 'rspec'
24
25
  spec.add_development_dependency 'simplecov'
25
26
  end
data/lib/honey_format.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  require 'honey_format/version'
2
+ require 'honey_format/exceptions'
2
3
  require 'honey_format/csv'
3
4
 
5
+ # Main module for HoneyFormat
4
6
  module HoneyFormat
7
+ # CSV alias
5
8
  HoneyCSV = CSV
6
9
  end
@@ -0,0 +1,56 @@
1
+ module HoneyFormat
2
+ # Represents columns.
3
+ class Columns
4
+ # @return [Columns] a new instance of Columns.
5
+ # @param [Array] header array of strings.
6
+ # @param [Array] valid array of symbols representing valid columns.
7
+ # @raise [MissingCSVHeaderColumnError] raised when header is missing
8
+ # @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
9
+ def initialize(header, valid = :all)
10
+ @columns = build_columns(header, valid)
11
+ end
12
+
13
+ # Returns columns as array.
14
+ # @return [Array] of columns.
15
+ def to_a
16
+ @columns
17
+ end
18
+
19
+ private
20
+
21
+ def build_columns(header, valid)
22
+ header.map do |column|
23
+ Sanitize.string!(column)
24
+ validate_column_presence!(column)
25
+
26
+ column = symnolize_string!(column)
27
+
28
+ validate_column_name!(column, valid)
29
+ column
30
+ end
31
+ end
32
+
33
+ def symnolize_string!(column)
34
+ column.downcase!
35
+ column.gsub!(/ /, '')
36
+ column.gsub!(/-/, '_')
37
+ column.to_sym
38
+ end
39
+
40
+ def validate_column_presence!(col)
41
+ if col.nil? || col.empty?
42
+ fail(MissingCSVHeaderColumnError, "CSV header column can't be empty.")
43
+ end
44
+ end
45
+
46
+ def validate_column_name!(column, valid)
47
+ return if valid == :all
48
+
49
+ valid.include?(column) ||
50
+ begin
51
+ err_msg = "column :#{column} not in #{valid.inspect}"
52
+ fail(UnknownCSVHeaderColumnError, err_msg)
53
+ end
54
+ end
55
+ end
56
+ end
@@ -1,63 +1,44 @@
1
1
  require 'csv'
2
2
 
3
- module HoneyFormat
4
- class MissingCSVHeaderError < StandardError; end
5
- class CSVHeaderColumnError < StandardError; end
3
+ require 'honey_format/sanitize'
4
+ require 'honey_format/rows'
5
+ require 'honey_format/header'
6
6
 
7
+ module HoneyFormat
8
+ # Represents CSV.
7
9
  class CSV
8
- attr_reader :header, :columns
9
-
10
+ # @return [CSV] a new instance of CSV.
11
+ # @param [String] csv string.
12
+ # @param [Array] valid_columns valid array of symbols representing valid columns.
13
+ # @param [Array] header optional argument for CSV header
14
+ # @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
15
+ # @raise [MissingCSVHeaderColumnError] raised when header column is missing.
16
+ # @raise [UnknownCSVHeaderColumnError] raised when column is not in valid list.
10
17
  def initialize(csv, delimiter: ',', header: nil, valid_columns: :all)
11
18
  csv = ::CSV.parse(csv, col_sep: delimiter)
12
- @head = build_header(header || csv.shift)
13
19
  @csv_body = csv
14
- @columns = build_columns(@head, valid_columns)
15
- @struct_klass = Struct.new(*@columns)
20
+ @header = Header.new(header || csv.shift, valid: valid_columns)
16
21
  end
17
22
 
23
+ # @return [Array] of strings for sanitized header.
18
24
  def header
19
- @head
20
- end
21
-
22
- def rows
23
- @rows ||= @csv_body.map { |row| create_object(row) }
24
- end
25
-
26
- def row_count
27
- rows.length
25
+ @header.column_names
28
26
  end
29
27
 
30
- private
31
-
32
- def build_header(head)
33
- head || fail(MissingCSVHeaderError, 'CSV header must be present.')
34
- clean_row(head)
35
- end
36
-
37
- def build_columns(keys, valid_columns)
38
- columns = keys.map do |raw_col|
39
- col = raw_col.downcase.gsub(/ /, '').to_sym
40
- validate_column!(raw_col, col, valid_columns)
41
- col
42
- end
43
- end
44
-
45
- def create_object(row)
46
- @struct_klass.new(*clean_row(row))
47
- end
48
-
49
- def clean_row(row)
50
- row.map { |column| clean(column) }
28
+ # @return [Array] of column identifiers.
29
+ def columns
30
+ @header.columns
51
31
  end
52
32
 
53
- def clean(column)
54
- column.strip unless column.nil?
33
+ # @return [Array] of rows.
34
+ # @raise [InvalidRowLengthError] raised when there are more row elements longer than columns
35
+ def rows
36
+ @rows ||= Rows.new(@csv_body, columns).to_a
55
37
  end
56
38
 
57
- def validate_column!(c, col, valid_columns)
58
- unless valid_columns == :all
59
- valid_columns.include?(col) || fail(CSVHeaderColumnError, "column :#{col} (\"#{c}\") not in #{valid_columns.inspect}")
60
- end
39
+ # @yield [row] block to receive the row.
40
+ def each_row
41
+ rows.each { |row| yield(row) }
61
42
  end
62
43
  end
63
44
  end
@@ -0,0 +1,12 @@
1
+ module HoneyFormat
2
+ # Raised when header is missing
3
+ class MissingCSVHeaderError < StandardError; end
4
+ # Raised when header column is missing
5
+ class MissingCSVHeaderColumnError < StandardError; end
6
+ # Raised when a column is not in passed valid columns
7
+ class UnknownCSVHeaderColumnError < StandardError; end
8
+ # Raised when columns are empty
9
+ class EmptyColumnsError < ArgumentError; end
10
+ # Raised when row has more columns than columns
11
+ class InvalidRowLengthError < ArgumentError; end
12
+ end
@@ -0,0 +1,32 @@
1
+ require 'honey_format/columns'
2
+
3
+ module HoneyFormat
4
+ # Represents a header
5
+ class Header
6
+ attr_reader :column_names
7
+
8
+ # @return [Header] a new instance of Header.
9
+ # @param [Array] header array of strings.
10
+ # @param [Array] valid array of symbols representing valid columns.
11
+ # @raise [MissingCSVHeaderError] raised when header is missing (empty or nil).
12
+ def initialize(header, valid: :all)
13
+ @column_names = build_header(header)
14
+ @columns = Columns.new(@column_names, valid)
15
+ end
16
+
17
+ # Returns columns as array.
18
+ # @return [Array] of columns.
19
+ def columns
20
+ @columns.to_a
21
+ end
22
+
23
+ private
24
+
25
+ def build_header(header)
26
+ if header.nil? || header.empty?
27
+ fail(MissingCSVHeaderError, "CSV header can't be empty.")
28
+ end
29
+ Sanitize.array!(header)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,47 @@
1
+ module HoneyFormat
2
+ # Holds data for a single row.
3
+ class Row
4
+ # Returns a new instance of Row.
5
+ # @return [Row] a new instance of Row.
6
+ # @param [Array] columns an array of symbols.
7
+ # @raise [EmptyColumnsError] raised when there are no columns.
8
+ # @example Create new row
9
+ # Row.new!([:id])
10
+ def initialize(columns)
11
+ validate_columns!(columns)
12
+ @klass = Struct.new(*columns)
13
+ @columns = columns
14
+ end
15
+
16
+ # Returns a Struct.
17
+ # @return [Struct] a new instance of Row.
18
+ # @param row [Array] the row array.
19
+ # @raise [InvalidRowLengthError] raised when there are more row elements longer than columns
20
+ # @example Build new row
21
+ # r = Row.new!([:id])
22
+ # r.build(['1']).id #=> '1'
23
+ def build(row)
24
+ @klass.new(*row)
25
+ rescue ArgumentError, 'struct size differs'
26
+ fail_for_struct_size_diff!(row)
27
+ end
28
+
29
+ private
30
+
31
+ def validate_columns!(columns)
32
+ if columns.empty?
33
+ err_msg = 'Expected array with at least one element, but was empty.'
34
+ fail(EmptyColumnsError, err_msg)
35
+ end
36
+ end
37
+
38
+ def fail_for_struct_size_diff!(row)
39
+ err_msg = [
40
+ "Row length #{row.length}",
41
+ "for columns #{@columns.length}",
42
+ "row: #{row.inspect}"
43
+ ].join(', ')
44
+ fail(InvalidRowLengthError, err_msg)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,32 @@
1
+ require 'honey_format/row'
2
+
3
+ module HoneyFormat
4
+ # Represents rows.
5
+ class Rows
6
+ # Returns array of cleaned strings.
7
+ # @return [Rows] new instance of Rows.
8
+ # @param [Array] rows the array of rows.
9
+ # @param [Array] columns the array of column symbols.
10
+ def initialize(rows, columns)
11
+ @rows = prepare_rows(Row.new(columns), rows)
12
+ end
13
+
14
+ # Returns rows as array.
15
+ # @return [Array] of rows.
16
+ def to_a
17
+ @rows
18
+ end
19
+
20
+ private
21
+
22
+ def prepare_rows(builder, rows)
23
+ built_rows = []
24
+ rows.each do |row|
25
+ unless row.first.nil? # The Ruby CSV library returns empty lines as [nil], ignore them..
26
+ built_rows << builder.build(Sanitize.array!(row))
27
+ end
28
+ end
29
+ built_rows
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ module HoneyFormat
2
+ # Utility class for sanitizing various simple data types.
3
+ class Sanitize
4
+ # Returns array of cleaned strings.
5
+ # @return [Array] the cleaned array of strings.
6
+ # @param [Array] row the array of strings to be cleaned.
7
+ # @example Sanitize array
8
+ # Sanitize.array!([" a "]) #=> ["a"]
9
+ def self.array!(row)
10
+ row.map! { |column| string!(column) }
11
+ row
12
+ end
13
+
14
+ # Returns array of cleaned elements.
15
+ # @return [String] the cleaned array.
16
+ # @param [String] column the string to be cleaned.
17
+ # @example Sanitize string
18
+ # Sanitize.string!(" a ") #=> "a"
19
+ # @example Sanitize nil
20
+ # Sanitize.string!(nil) #=> nil
21
+ def self.string!(column)
22
+ column.strip! unless column.nil?
23
+ column
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,4 @@
1
1
  module HoneyFormat
2
- VERSION = '0.1.1'
2
+ # Gem version
3
+ VERSION = '0.2.0'
3
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: honey_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-02 00:00:00.000000000 Z
11
+ date: 2015-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: benchmark-ips
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -77,17 +91,24 @@ files:
77
91
  - ".gitignore"
78
92
  - ".rspec"
79
93
  - ".travis.yml"
94
+ - CHANGELOG.md
80
95
  - CODE_OF_CONDUCT.md
81
96
  - Gemfile
82
97
  - LICENSE.txt
83
98
  - README.md
84
99
  - Rakefile
85
- - benchmark.rb
100
+ - bin/benchmark
86
101
  - bin/console
87
102
  - bin/setup
88
103
  - honey_format.gemspec
89
104
  - lib/honey_format.rb
105
+ - lib/honey_format/columns.rb
90
106
  - lib/honey_format/csv.rb
107
+ - lib/honey_format/exceptions.rb
108
+ - lib/honey_format/header.rb
109
+ - lib/honey_format/row.rb
110
+ - lib/honey_format/rows.rb
111
+ - lib/honey_format/sanitize.rb
91
112
  - lib/honey_format/version.rb
92
113
  homepage: https://github.com/buren/honey_format
93
114
  licenses:
data/benchmark.rb DELETED
@@ -1,17 +0,0 @@
1
- require 'benchmark/ips'
2
- require 'csv'
3
-
4
- require 'bundler/setup'
5
- require 'honey_format'
6
-
7
- csv = File.read('benchmark.csv')
8
-
9
- Benchmark.ips do |x|
10
- x.time = 10
11
- x.warmup = 2
12
-
13
- x.report("stdlib CSV") { CSV.parse(csv) }
14
- x.report("HoneyFormat::CSV") { HoneyFormat::CSV.new(csv) }
15
-
16
- x.compare!
17
- end