uncsv 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.rubocop.yml +21 -0
- data/.travis.yml +37 -0
- data/.yardopts +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +201 -0
- data/README.md +159 -0
- data/Rakefile +13 -0
- data/bin/check-version +6 -0
- data/bin/console +15 -0
- data/bin/rake +18 -0
- data/bin/rspec +18 -0
- data/bin/rubocop +18 -0
- data/bin/yard +18 -0
- data/bin/yardoc +18 -0
- data/bin/yri +18 -0
- data/lib/uncsv/config.rb +204 -0
- data/lib/uncsv/header.rb +173 -0
- data/lib/uncsv/key_normalizer.rb +61 -0
- data/lib/uncsv/row.rb +109 -0
- data/lib/uncsv/rows.rb +99 -0
- data/lib/uncsv/version.rb +6 -0
- data/lib/uncsv.rb +71 -0
- data/uncsv.gemspec +30 -0
- metadata +181 -0
data/lib/uncsv/config.rb
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# Configuration options for parsing CSVs. It is a struct-like object with
|
5
|
+
# attribute acessors.
|
6
|
+
class Config
|
7
|
+
# Options that directly map to Std-lib `CSV` options
|
8
|
+
CSV_OPTS = %i[
|
9
|
+
col_sep row_sep quote_char field_size_limit
|
10
|
+
].freeze
|
11
|
+
|
12
|
+
# The default values applied if an attribute's value is not specified when
|
13
|
+
# constructing a new `Config` object.
|
14
|
+
DEFAULTS = {
|
15
|
+
col_sep: ',',
|
16
|
+
expand_headers: false,
|
17
|
+
field_size_limit: nil,
|
18
|
+
header_rows: [],
|
19
|
+
header_separator: '.',
|
20
|
+
nil_empty: true,
|
21
|
+
normalize_headers: false,
|
22
|
+
quote_char: '"',
|
23
|
+
row_sep: :auto,
|
24
|
+
skip_rows: [],
|
25
|
+
skip_blanks: false,
|
26
|
+
unique_headers: false
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
# The string that separates each field
|
30
|
+
#
|
31
|
+
# Default: `","`.
|
32
|
+
#
|
33
|
+
# @return [String] The column separator string
|
34
|
+
# @see (see #initialize)
|
35
|
+
attr_accessor :col_sep
|
36
|
+
|
37
|
+
# @!attribute expand_headers
|
38
|
+
# Whether to fill empty headers with values from the left.
|
39
|
+
#
|
40
|
+
# Default `false`. If set to `true`, blank header row cells will assume
|
41
|
+
# the header of the row to their left. This is useful for heirarchical
|
42
|
+
# headers where not all the header cells are filled in. If set to an
|
43
|
+
# array of header indexes, only the specified headers will be expanded.
|
44
|
+
#
|
45
|
+
# @return [Array] An array of expaned header indexes
|
46
|
+
|
47
|
+
# The maximum size CSV will read ahead looking for a closing quote.
|
48
|
+
#
|
49
|
+
# Default: `nil`.
|
50
|
+
#
|
51
|
+
# @return [nil, Integer] The maximum field size
|
52
|
+
# @see (see #initialize)
|
53
|
+
attr_accessor :field_size_limit
|
54
|
+
|
55
|
+
# Indexes of the rows to use as headers
|
56
|
+
#
|
57
|
+
# Default: `[]`. Accepts an array of zero-based indexes or a single index.
|
58
|
+
# For example, it could be set to `0` to indicate a header in the first row.
|
59
|
+
# If set to an array of indexes (`[1,2]`), the header row text will be
|
60
|
+
# joined by the `:header_separator`. For example, if if the cell (0,0) had
|
61
|
+
# the value `"Personal"` and cell (1,0) had the value "Name", the header
|
62
|
+
# would become `"Personal.Name"`. Any data above the last header row will be
|
63
|
+
# ignored.
|
64
|
+
#
|
65
|
+
# @return [Array] The header row indexes
|
66
|
+
attr_reader :header_rows
|
67
|
+
|
68
|
+
# The separator between multiple header fields
|
69
|
+
#
|
70
|
+
# Default: `"."`. When using multiple header rows, this is a string used
|
71
|
+
# to separate the individual header fields.
|
72
|
+
#
|
73
|
+
# @return [String] The separator string
|
74
|
+
attr_accessor :header_separator
|
75
|
+
|
76
|
+
# Whether to represent empty cells as `nil`.
|
77
|
+
#
|
78
|
+
# Default `false`. If `true`, empty cells will be set to `nil`, otherwise,
|
79
|
+
# they are set to an empty string.
|
80
|
+
#
|
81
|
+
# @return [Boolean] Whether empty cells will be `nil`ed
|
82
|
+
attr_accessor :nil_empty
|
83
|
+
|
84
|
+
# Whether to rewrite headers to a standard format
|
85
|
+
#
|
86
|
+
# Default `false`. If set to `true`, header field text will be normalized.
|
87
|
+
# The text will be lowercased, and non-alphanumeric characters will be
|
88
|
+
# replaced with underscores (`_`).
|
89
|
+
#
|
90
|
+
# If set to a string, those characters will
|
91
|
+
# be replaced with the string instead.
|
92
|
+
#
|
93
|
+
# If set to a hash, the hash will be treated as options to KeyNormalizer,
|
94
|
+
# accepting the `:separator`, and `:downcase` options.
|
95
|
+
#
|
96
|
+
# If set to another object, it is expected to respond to the
|
97
|
+
# `normalize(key)` method by returning a normalized string.
|
98
|
+
#
|
99
|
+
# @see KeyNormalizer
|
100
|
+
# @return [KeyNormalizer, Object] The KeyNormalizer object or equivalent
|
101
|
+
attr_reader :normalize_headers
|
102
|
+
|
103
|
+
# The character used to quote individual fields
|
104
|
+
#
|
105
|
+
# Default `'"'`. If set to `true`, header field text will be normalized. The
|
106
|
+
# text will be lowercased, and non-alphanumeric characters will be replaced
|
107
|
+
# with underscores (`_`). If set to a string, those characters will be
|
108
|
+
# replaced with the string instead.
|
109
|
+
#
|
110
|
+
# @return [String] The quote character
|
111
|
+
# @see (see #initialize)
|
112
|
+
attr_accessor :quote_char
|
113
|
+
|
114
|
+
# The string at the end of each row
|
115
|
+
#
|
116
|
+
# Default `:auto`.
|
117
|
+
#
|
118
|
+
# @return [:auto, String] The row separator
|
119
|
+
# @see (see #initialize)
|
120
|
+
attr_accessor :row_sep
|
121
|
+
|
122
|
+
# Whether to skip blank rows
|
123
|
+
#
|
124
|
+
# Default `false`. If `true`, rows whose fields are all empty will be
|
125
|
+
# skipped.
|
126
|
+
#
|
127
|
+
# @return [Boolean] Whether blank rows will be skipped
|
128
|
+
attr_accessor :skip_blanks
|
129
|
+
|
130
|
+
# An array of row indexes to skip
|
131
|
+
#
|
132
|
+
# Default `[]`. If set to an array of zero-based row indexes, those rows
|
133
|
+
# will be skipped. This option does not apply to header rows.
|
134
|
+
#
|
135
|
+
# @return [Array] The row index to skip
|
136
|
+
attr_reader :skip_rows
|
137
|
+
|
138
|
+
# Whether to force headers to be unique
|
139
|
+
#
|
140
|
+
# Default `false`. If set to `true`, headers will be forced to be unique by
|
141
|
+
# appending numbers to duplicates. For example, if two header cells have the
|
142
|
+
# text `"Name"`, the headers will become `"Name.0"`, and `"Name.1"`. The
|
143
|
+
# separator between the text and the number can be set using the
|
144
|
+
# `:header_separator` option.
|
145
|
+
#
|
146
|
+
# @return [Boolean] Whether headers will be uniqued
|
147
|
+
attr_accessor :unique_headers
|
148
|
+
|
149
|
+
# Create a new `Config` object.
|
150
|
+
#
|
151
|
+
# Options will be set to the defaults unless overridden by the `opts`
|
152
|
+
# parameter.
|
153
|
+
#
|
154
|
+
# @param opts [Hash] A hash of configuration options. See the individual
|
155
|
+
# attributes for detailed descriptions.
|
156
|
+
#
|
157
|
+
# @see http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html#method-c-new
|
158
|
+
# CSV#new
|
159
|
+
def initialize(opts = {})
|
160
|
+
DEFAULTS.merge(opts).each { |k, v| public_send("#{k}=", v) }
|
161
|
+
end
|
162
|
+
|
163
|
+
def skip_rows=(rows)
|
164
|
+
rows = [rows] unless rows.is_a?(Array)
|
165
|
+
@skip_rows = Hash[rows.map { |r| [r, true] }]
|
166
|
+
end
|
167
|
+
|
168
|
+
def header_rows=(rows)
|
169
|
+
rows = [rows] unless rows.is_a?(Array)
|
170
|
+
@header_rows = rows.sort
|
171
|
+
end
|
172
|
+
|
173
|
+
def expand_headers=(value)
|
174
|
+
value = [value] if value.is_a?(Integer)
|
175
|
+
@expand_headers = value
|
176
|
+
end
|
177
|
+
|
178
|
+
def normalize_headers=(value)
|
179
|
+
if value.is_a?(Hash)
|
180
|
+
value = KeyNormalizer.new(value)
|
181
|
+
elsif value.is_a?(String)
|
182
|
+
value = KeyNormalizer.new(separator: value)
|
183
|
+
elsif value == true
|
184
|
+
value = KeyNormalizer.new
|
185
|
+
end
|
186
|
+
@normalize_headers = value
|
187
|
+
end
|
188
|
+
|
189
|
+
def expand_headers
|
190
|
+
return header_rows if @expand_headers == true
|
191
|
+
return [] if @expand_headers == false
|
192
|
+
|
193
|
+
@expand_headers
|
194
|
+
end
|
195
|
+
|
196
|
+
# Get options passed through to `CSV#new`.
|
197
|
+
#
|
198
|
+
# @return [Hash] A hash of the CSV options
|
199
|
+
# @see (see #initialize)
|
200
|
+
def csv_opts
|
201
|
+
Hash[CSV_OPTS.map { |k| [k, public_send(k)] }]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
data/lib/uncsv/header.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# A parsed CSV header.
|
5
|
+
class Header
|
6
|
+
# Create a new `Header` object
|
7
|
+
#
|
8
|
+
# @param headers [Array<Array<String>>] An array of header row values
|
9
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
10
|
+
def initialize(headers, config = nil)
|
11
|
+
@headers = headers
|
12
|
+
@config = config || Config.new
|
13
|
+
@to_a = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
# Iterate over each header field
|
17
|
+
#
|
18
|
+
# @yield A block to run for each header field
|
19
|
+
# @yieldparam row [String] A header field
|
20
|
+
# @return [Enumerator] An enumerator over header field
|
21
|
+
def each(&block)
|
22
|
+
to_a.each(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get an array of parsed header fields
|
26
|
+
#
|
27
|
+
# The header fields are cached, so consecutive calls to this method return
|
28
|
+
# the same array.
|
29
|
+
#
|
30
|
+
# @return [Array] The array of header fields
|
31
|
+
def to_a
|
32
|
+
@to_a ||= begin
|
33
|
+
headers = nil_empty(@headers)
|
34
|
+
headers = square(headers)
|
35
|
+
headers = normalize(headers) if @config.normalize_headers
|
36
|
+
headers = expand(headers)
|
37
|
+
combined = combine(headers)
|
38
|
+
combined = unique(combined) if @config.unique_headers
|
39
|
+
combined
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class << self
|
44
|
+
# Parse headers from a CSV
|
45
|
+
#
|
46
|
+
# @param csv [CSV] A
|
47
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object.
|
48
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
49
|
+
# @return [OpenStruct] An object with the methods `header`, `index`, and
|
50
|
+
# `rows`. `header` is the {Header} object. `index` is the next CSV row
|
51
|
+
# index. `rows` is an array of the skipped rows including the header
|
52
|
+
# rows.
|
53
|
+
def parse!(csv, config)
|
54
|
+
index = config.header_rows.empty? ? 0 : (config.header_rows.max + 1)
|
55
|
+
rows = read_rows(csv, index)
|
56
|
+
headers = config.header_rows.map { |i| rows[i] }
|
57
|
+
OpenStruct.new(
|
58
|
+
header: new(headers, config),
|
59
|
+
index: index,
|
60
|
+
rows: rows
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Read a given number of rows from a CSV
|
67
|
+
#
|
68
|
+
# @param csv [CSV] A
|
69
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object to
|
70
|
+
# read rows from.
|
71
|
+
# @param count [Integer] The number of rows to read
|
72
|
+
# @return [Array<Array<String>>] An array of the read rows
|
73
|
+
def read_rows(csv, count)
|
74
|
+
(0...count).map { csv.shift }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Combine multiple headers into a single header
|
81
|
+
#
|
82
|
+
# Joins individual headers with the `header_separator`.
|
83
|
+
#
|
84
|
+
# @param headers [Array<Array<String>>] The headers to combine
|
85
|
+
# @return [Array<String>] The combined header
|
86
|
+
def combine(headers)
|
87
|
+
headers.each_with_object([]) do |header, combined|
|
88
|
+
header.each_with_index do |key, index|
|
89
|
+
parts = [combined[index], key].compact
|
90
|
+
combined[index] = if parts.empty?
|
91
|
+
nil
|
92
|
+
else
|
93
|
+
parts.join(@config.header_separator)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Fills in `nil` headers from the left
|
100
|
+
#
|
101
|
+
# @param headers [Array<Array<String>>] The headers to expand
|
102
|
+
# @return [Array<Array<String>>] The expanded headers
|
103
|
+
def expand(headers)
|
104
|
+
headers.each_with_index.map do |header, index|
|
105
|
+
next header unless @config.expand_headers.include?(index)
|
106
|
+
|
107
|
+
last = nil
|
108
|
+
header.map do |key|
|
109
|
+
key ? last = key : last
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Unique headers by adding numbers to the end
|
115
|
+
#
|
116
|
+
# @param combined [Array<String>] The combined headers to unique
|
117
|
+
# @return [Array<String>] The uniqued headers
|
118
|
+
def unique(combined)
|
119
|
+
combined = combined.dup
|
120
|
+
collate(combined).each do |key, indexes|
|
121
|
+
next if indexes.size == 1
|
122
|
+
|
123
|
+
indexes.each_with_index do |index, count|
|
124
|
+
combined[index] = [key, count].compact.join(@config.header_separator)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
combined
|
128
|
+
end
|
129
|
+
|
130
|
+
# Create a hash of headers to arrays of their indexes
|
131
|
+
#
|
132
|
+
# Used for checking for header uniqueness
|
133
|
+
#
|
134
|
+
# @param header [Array<String>] The combined header to collate
|
135
|
+
# @return [Hash] The collated headers
|
136
|
+
def collate(header)
|
137
|
+
collated = {}
|
138
|
+
header.each_with_index do |key, index|
|
139
|
+
collated[key] = (collated[key] || []) << index
|
140
|
+
end
|
141
|
+
collated
|
142
|
+
end
|
143
|
+
|
144
|
+
# Normalize header values
|
145
|
+
#
|
146
|
+
# @param headers [Array<Array<String>>] The array of uncombined headers to
|
147
|
+
# normalize
|
148
|
+
def normalize(headers)
|
149
|
+
headers.map do |header|
|
150
|
+
header.map do |key|
|
151
|
+
@config.normalize_headers.normalize(key)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Make the headers all the same length
|
157
|
+
#
|
158
|
+
# @param headers [Array<Array<String>>] An array of headers to square
|
159
|
+
# @return [Array<Array<String>>] The squared headers
|
160
|
+
def square(headers)
|
161
|
+
length = headers.map(&:size).max
|
162
|
+
headers.map { |h| h.fill(nil, h.size, length - h.size) }
|
163
|
+
end
|
164
|
+
|
165
|
+
# Convert header empty strings to nil
|
166
|
+
#
|
167
|
+
# @param headers [Array<Array<String>>] An array of headers to convert
|
168
|
+
# @return [Array<Array<String>>] The converted headers
|
169
|
+
def nil_empty(headers)
|
170
|
+
headers.map { |h| h.map { |k| k == '' ? nil : k } }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# Normalizes strings into a consistant format
|
5
|
+
class KeyNormalizer
|
6
|
+
# The default values applied if an attribute's value is not specified when
|
7
|
+
# constructing a new `KeyNormalizer` object.
|
8
|
+
DEFAULTS = {
|
9
|
+
downcase: true,
|
10
|
+
separator: '_'
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
# A string to replace all non-alphanumeric characters in the key
|
14
|
+
#
|
15
|
+
# Default: '_'. Can be set to an empty string to remove non-alphanumeric
|
16
|
+
# characters without replacing them.
|
17
|
+
#
|
18
|
+
# @return [String] The separator string
|
19
|
+
attr_accessor :separator
|
20
|
+
|
21
|
+
# Sets keys to all lower-case if set to `true`
|
22
|
+
#
|
23
|
+
# Default: true
|
24
|
+
#
|
25
|
+
# @return [Boolean] Whether the key will be lower-cased
|
26
|
+
attr_accessor :downcase
|
27
|
+
|
28
|
+
# Create a new `KeyNormalizer` object.
|
29
|
+
#
|
30
|
+
# Options will be set to the defaults unless overridden by the `opts`
|
31
|
+
# parameter.
|
32
|
+
#
|
33
|
+
# @param opts [Hash] A hash of configuration options. See the individual
|
34
|
+
# attributes for detailed descriptions.
|
35
|
+
def initialize(opts = {})
|
36
|
+
DEFAULTS.merge(opts).each { |k, v| public_send("#{k}=", v) }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Normalize a key
|
40
|
+
#
|
41
|
+
# Replaces non-alphanumeric characters with `separator`, then
|
42
|
+
# deduplicates underscores and trims them from the ends of the key. Then
|
43
|
+
# the key is lower-cased if `downcase` is set.
|
44
|
+
#
|
45
|
+
# @param key [String, nil] The key field to normalize
|
46
|
+
# @return [String, nil] The normalized header field or `nil` if the input
|
47
|
+
# key is `nil`.
|
48
|
+
def normalize(key)
|
49
|
+
return nil if key.nil?
|
50
|
+
|
51
|
+
key = key.gsub(/[^a-z0-9]+/i, separator)
|
52
|
+
unless separator.empty?
|
53
|
+
escaped_separator = Regexp.escape(separator)
|
54
|
+
key.gsub!(/#{escaped_separator}{2,}/, separator)
|
55
|
+
key.gsub!(/^#{escaped_separator}|#{escaped_separator}$/, '')
|
56
|
+
end
|
57
|
+
key.downcase! if downcase
|
58
|
+
key
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/uncsv/row.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# A single data row from a CSV. Fields can be accessed by header or zero-based
|
5
|
+
# index.
|
6
|
+
class Row
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# The headers for each field
|
10
|
+
#
|
11
|
+
# If a header for a given field is not defined, it will be `nil`.
|
12
|
+
#
|
13
|
+
# @return [Array] An array of the field headers
|
14
|
+
attr_reader :header
|
15
|
+
|
16
|
+
# The fields ordered from left to right
|
17
|
+
#
|
18
|
+
# An array of zero-indexed field values. If a field is empty it will be
|
19
|
+
# `nil`, or `''` if `nil_empty` is `false`.
|
20
|
+
#
|
21
|
+
# @return [Array] An array of the field values
|
22
|
+
attr_reader :fields
|
23
|
+
|
24
|
+
# Create a new `Row` object
|
25
|
+
#
|
26
|
+
# The `header` and `fields` arrays do not need to be the same length. If
|
27
|
+
# they are not, the missing values will be filled with `nil`.
|
28
|
+
#
|
29
|
+
# @param header [Array] The field headers
|
30
|
+
# @param fields [Array] The field values
|
31
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
32
|
+
def initialize(header, fields, config = nil)
|
33
|
+
@config = config || Config.new
|
34
|
+
@header = square(header, fields.size)
|
35
|
+
@fields = square(fields, header.size).map { |f| process(f) }
|
36
|
+
@map = Hash[header.zip(@fields)]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Get a field by index or header
|
40
|
+
#
|
41
|
+
# If `key` is an `Integer`, get a field by a zero-based index. If `key` is a
|
42
|
+
# header, access a field by it's header. If `key` is nil, or if a field does
|
43
|
+
# not exist, will return `nil`.
|
44
|
+
#
|
45
|
+
# @param key [Integer, String] The index or header
|
46
|
+
# @return [String, nil] The field value if it exists
|
47
|
+
def [](key)
|
48
|
+
return if key.nil?
|
49
|
+
|
50
|
+
value = key.is_a?(Integer) ? @fields[key] : @map[key]
|
51
|
+
process(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Gets a hash of headers to fields
|
55
|
+
#
|
56
|
+
# `nil` headers will not be included in the hash.
|
57
|
+
#
|
58
|
+
# @return [Hash] A hash of headers to fields
|
59
|
+
def to_h
|
60
|
+
Hash[@header.compact.map { |h| [h, self[h]] }]
|
61
|
+
end
|
62
|
+
|
63
|
+
# Iterate over each pair of headers and fields
|
64
|
+
#
|
65
|
+
# @yield A block to run for each pair
|
66
|
+
# @yieldparam row [Row] A row object
|
67
|
+
# @return [Enumerator] An enumerator over each pair
|
68
|
+
def each(&block)
|
69
|
+
@map.each_pair(&block)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Get a field by index or header and specify a default
|
73
|
+
#
|
74
|
+
# Tries to get the field specified by key (see {#[]}). If the field
|
75
|
+
# is `nil`, returns the default. If a block is given, the default is the
|
76
|
+
# block's return value, otherwise the default is the `default` argument.
|
77
|
+
#
|
78
|
+
# @yield A block to run if the field is `nil`
|
79
|
+
# @yieldparam key [String] The `key` parameter
|
80
|
+
# @return [String, Object] The field value or default
|
81
|
+
def fetch(key, default = nil)
|
82
|
+
value = self[key]
|
83
|
+
return value unless value.nil?
|
84
|
+
|
85
|
+
block_given? ? yield(key) : default
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Fills an array with nil to extend it to the given size
|
91
|
+
#
|
92
|
+
# @param array [Array] The array to square
|
93
|
+
# @param size [Integer] The target array size
|
94
|
+
# @return [Array] The squared array
|
95
|
+
def square(array, size)
|
96
|
+
array.fill(nil, array.size, size - array.size)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Transforms a field value according to the config options
|
100
|
+
#
|
101
|
+
# @param field [String] The field value to process
|
102
|
+
# @return [String] The processed field
|
103
|
+
def process(field)
|
104
|
+
field = '' if field.nil? && !@config.nil_empty
|
105
|
+
field = nil if field == '' && @config.nil_empty
|
106
|
+
field
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/uncsv/rows.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
class Uncsv
|
6
|
+
# A collection of parsed rows from a CSV
|
7
|
+
class Rows
|
8
|
+
# Create a new `Rows` object
|
9
|
+
#
|
10
|
+
# @param csv [CSV] A
|
11
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object.
|
12
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
13
|
+
def initialize(csv, config = nil)
|
14
|
+
@csv = csv
|
15
|
+
@config = config || Config.new
|
16
|
+
@started = false
|
17
|
+
@parsed = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
# Iterate over each row
|
21
|
+
#
|
22
|
+
# @yield A block to run for each row
|
23
|
+
# @yieldparam row [Row] A row object
|
24
|
+
# @return [Enumerator] An enumerator over each row
|
25
|
+
def each(&block)
|
26
|
+
Enumerator.new do |yielder|
|
27
|
+
start
|
28
|
+
index = parsed.index
|
29
|
+
loop do
|
30
|
+
break unless yield_row(yielder, index)
|
31
|
+
|
32
|
+
index += 1
|
33
|
+
end
|
34
|
+
end.each(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the CSV header
|
38
|
+
#
|
39
|
+
# @return [Array] An array of the CSV header fields
|
40
|
+
# @see Header#to_a
|
41
|
+
def header
|
42
|
+
parsed.header.to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
# Whether the given row should be skipped
|
48
|
+
#
|
49
|
+
# @param fields [Array] An array of field values
|
50
|
+
# @param index [Integer] The zero-based row index
|
51
|
+
# @return [Boolean] Whether the row should be skipped
|
52
|
+
def should_skip?(fields, index)
|
53
|
+
return true if @config.skip_rows[index]
|
54
|
+
return true if @config.skip_blanks && fields.compact.empty?
|
55
|
+
|
56
|
+
false
|
57
|
+
end
|
58
|
+
|
59
|
+
# Yield a row from the CSV to the Enumerator yielder
|
60
|
+
#
|
61
|
+
# Reads a row from the CSV and yields a parsed row if necessary.
|
62
|
+
#
|
63
|
+
# @param yielder [Enumerator::Yielder] A yielder to yield the row to
|
64
|
+
# @param index [Integer] The next row index
|
65
|
+
# @return [Boolean] `false` if the CSV is ended
|
66
|
+
def yield_row(yielder, index)
|
67
|
+
fields = @csv.shift
|
68
|
+
return false unless fields
|
69
|
+
|
70
|
+
unless should_skip?(fields, index)
|
71
|
+
yielder << Row.new(header, fields, @config)
|
72
|
+
end
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
# Start reading the CSV
|
77
|
+
#
|
78
|
+
# If the CSV has already been read, it will be rewound and the header will
|
79
|
+
# be reset.
|
80
|
+
def start
|
81
|
+
if @started
|
82
|
+
@parsed = nil
|
83
|
+
@csv.rewind
|
84
|
+
else
|
85
|
+
@started = true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Get the header parse object
|
90
|
+
#
|
91
|
+
# The parsed header is cached, so multiple calls will return the same
|
92
|
+
# instance.
|
93
|
+
#
|
94
|
+
# @return [OpenStruct] The parsed header object
|
95
|
+
def parsed
|
96
|
+
@parsed ||= Header.parse!(@csv, @config)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|