uncsv 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.rubocop.yml +21 -0
- data/.travis.yml +37 -0
- data/.yardopts +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +201 -0
- data/README.md +159 -0
- data/Rakefile +13 -0
- data/bin/check-version +6 -0
- data/bin/console +15 -0
- data/bin/rake +18 -0
- data/bin/rspec +18 -0
- data/bin/rubocop +18 -0
- data/bin/yard +18 -0
- data/bin/yardoc +18 -0
- data/bin/yri +18 -0
- data/lib/uncsv/config.rb +204 -0
- data/lib/uncsv/header.rb +173 -0
- data/lib/uncsv/key_normalizer.rb +61 -0
- data/lib/uncsv/row.rb +109 -0
- data/lib/uncsv/rows.rb +99 -0
- data/lib/uncsv/version.rb +6 -0
- data/lib/uncsv.rb +71 -0
- data/uncsv.gemspec +30 -0
- metadata +181 -0
data/lib/uncsv/config.rb
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# Configuration options for parsing CSVs. It is a struct-like object with
|
5
|
+
# attribute acessors.
|
6
|
+
class Config
|
7
|
+
# Options that directly map to Std-lib `CSV` options
|
8
|
+
CSV_OPTS = %i[
|
9
|
+
col_sep row_sep quote_char field_size_limit
|
10
|
+
].freeze
|
11
|
+
|
12
|
+
# The default values applied if an attribute's value is not specified when
|
13
|
+
# constructing a new `Config` object.
|
14
|
+
DEFAULTS = {
|
15
|
+
col_sep: ',',
|
16
|
+
expand_headers: false,
|
17
|
+
field_size_limit: nil,
|
18
|
+
header_rows: [],
|
19
|
+
header_separator: '.',
|
20
|
+
nil_empty: true,
|
21
|
+
normalize_headers: false,
|
22
|
+
quote_char: '"',
|
23
|
+
row_sep: :auto,
|
24
|
+
skip_rows: [],
|
25
|
+
skip_blanks: false,
|
26
|
+
unique_headers: false
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
# The string that separates each field
|
30
|
+
#
|
31
|
+
# Default: `","`.
|
32
|
+
#
|
33
|
+
# @return [String] The column separator string
|
34
|
+
# @see (see #initialize)
|
35
|
+
attr_accessor :col_sep
|
36
|
+
|
37
|
+
# @!attribute expand_headers
|
38
|
+
# Whether to fill empty headers with values from the left.
|
39
|
+
#
|
40
|
+
# Default `false`. If set to `true`, blank header row cells will assume
|
41
|
+
# the header of the row to their left. This is useful for heirarchical
|
42
|
+
# headers where not all the header cells are filled in. If set to an
|
43
|
+
# array of header indexes, only the specified headers will be expanded.
|
44
|
+
#
|
45
|
+
# @return [Array] An array of expaned header indexes
|
46
|
+
|
47
|
+
# The maximum size CSV will read ahead looking for a closing quote.
|
48
|
+
#
|
49
|
+
# Default: `nil`.
|
50
|
+
#
|
51
|
+
# @return [nil, Integer] The maximum field size
|
52
|
+
# @see (see #initialize)
|
53
|
+
attr_accessor :field_size_limit
|
54
|
+
|
55
|
+
# Indexes of the rows to use as headers
|
56
|
+
#
|
57
|
+
# Default: `[]`. Accepts an array of zero-based indexes or a single index.
|
58
|
+
# For example, it could be set to `0` to indicate a header in the first row.
|
59
|
+
# If set to an array of indexes (`[1,2]`), the header row text will be
|
60
|
+
# joined by the `:header_separator`. For example, if if the cell (0,0) had
|
61
|
+
# the value `"Personal"` and cell (1,0) had the value "Name", the header
|
62
|
+
# would become `"Personal.Name"`. Any data above the last header row will be
|
63
|
+
# ignored.
|
64
|
+
#
|
65
|
+
# @return [Array] The header row indexes
|
66
|
+
attr_reader :header_rows
|
67
|
+
|
68
|
+
# The separator between multiple header fields
|
69
|
+
#
|
70
|
+
# Default: `"."`. When using multiple header rows, this is a string used
|
71
|
+
# to separate the individual header fields.
|
72
|
+
#
|
73
|
+
# @return [String] The separator string
|
74
|
+
attr_accessor :header_separator
|
75
|
+
|
76
|
+
# Whether to represent empty cells as `nil`.
|
77
|
+
#
|
78
|
+
# Default `false`. If `true`, empty cells will be set to `nil`, otherwise,
|
79
|
+
# they are set to an empty string.
|
80
|
+
#
|
81
|
+
# @return [Boolean] Whether empty cells will be `nil`ed
|
82
|
+
attr_accessor :nil_empty
|
83
|
+
|
84
|
+
# Whether to rewrite headers to a standard format
|
85
|
+
#
|
86
|
+
# Default `false`. If set to `true`, header field text will be normalized.
|
87
|
+
# The text will be lowercased, and non-alphanumeric characters will be
|
88
|
+
# replaced with underscores (`_`).
|
89
|
+
#
|
90
|
+
# If set to a string, those characters will
|
91
|
+
# be replaced with the string instead.
|
92
|
+
#
|
93
|
+
# If set to a hash, the hash will be treated as options to KeyNormalizer,
|
94
|
+
# accepting the `:separator`, and `:downcase` options.
|
95
|
+
#
|
96
|
+
# If set to another object, it is expected to respond to the
|
97
|
+
# `normalize(key)` method by returning a normalized string.
|
98
|
+
#
|
99
|
+
# @see KeyNormalizer
|
100
|
+
# @return [KeyNormalizer, Object] The KeyNormalizer object or equivalent
|
101
|
+
attr_reader :normalize_headers
|
102
|
+
|
103
|
+
# The character used to quote individual fields
|
104
|
+
#
|
105
|
+
# Default `'"'`. If set to `true`, header field text will be normalized. The
|
106
|
+
# text will be lowercased, and non-alphanumeric characters will be replaced
|
107
|
+
# with underscores (`_`). If set to a string, those characters will be
|
108
|
+
# replaced with the string instead.
|
109
|
+
#
|
110
|
+
# @return [String] The quote character
|
111
|
+
# @see (see #initialize)
|
112
|
+
attr_accessor :quote_char
|
113
|
+
|
114
|
+
# The string at the end of each row
|
115
|
+
#
|
116
|
+
# Default `:auto`.
|
117
|
+
#
|
118
|
+
# @return [:auto, String] The row separator
|
119
|
+
# @see (see #initialize)
|
120
|
+
attr_accessor :row_sep
|
121
|
+
|
122
|
+
# Whether to skip blank rows
|
123
|
+
#
|
124
|
+
# Default `false`. If `true`, rows whose fields are all empty will be
|
125
|
+
# skipped.
|
126
|
+
#
|
127
|
+
# @return [Boolean] Whether blank rows will be skipped
|
128
|
+
attr_accessor :skip_blanks
|
129
|
+
|
130
|
+
# An array of row indexes to skip
|
131
|
+
#
|
132
|
+
# Default `[]`. If set to an array of zero-based row indexes, those rows
|
133
|
+
# will be skipped. This option does not apply to header rows.
|
134
|
+
#
|
135
|
+
# @return [Array] The row index to skip
|
136
|
+
attr_reader :skip_rows
|
137
|
+
|
138
|
+
# Whether to force headers to be unique
|
139
|
+
#
|
140
|
+
# Default `false`. If set to `true`, headers will be forced to be unique by
|
141
|
+
# appending numbers to duplicates. For example, if two header cells have the
|
142
|
+
# text `"Name"`, the headers will become `"Name.0"`, and `"Name.1"`. The
|
143
|
+
# separator between the text and the number can be set using the
|
144
|
+
# `:header_separator` option.
|
145
|
+
#
|
146
|
+
# @return [Boolean] Whether headers will be uniqued
|
147
|
+
attr_accessor :unique_headers
|
148
|
+
|
149
|
+
# Create a new `Config` object.
|
150
|
+
#
|
151
|
+
# Options will be set to the defaults unless overridden by the `opts`
|
152
|
+
# parameter.
|
153
|
+
#
|
154
|
+
# @param opts [Hash] A hash of configuration options. See the individual
|
155
|
+
# attributes for detailed descriptions.
|
156
|
+
#
|
157
|
+
# @see http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html#method-c-new
|
158
|
+
# CSV#new
|
159
|
+
def initialize(opts = {})
|
160
|
+
DEFAULTS.merge(opts).each { |k, v| public_send("#{k}=", v) }
|
161
|
+
end
|
162
|
+
|
163
|
+
def skip_rows=(rows)
|
164
|
+
rows = [rows] unless rows.is_a?(Array)
|
165
|
+
@skip_rows = Hash[rows.map { |r| [r, true] }]
|
166
|
+
end
|
167
|
+
|
168
|
+
def header_rows=(rows)
|
169
|
+
rows = [rows] unless rows.is_a?(Array)
|
170
|
+
@header_rows = rows.sort
|
171
|
+
end
|
172
|
+
|
173
|
+
def expand_headers=(value)
|
174
|
+
value = [value] if value.is_a?(Integer)
|
175
|
+
@expand_headers = value
|
176
|
+
end
|
177
|
+
|
178
|
+
def normalize_headers=(value)
|
179
|
+
if value.is_a?(Hash)
|
180
|
+
value = KeyNormalizer.new(value)
|
181
|
+
elsif value.is_a?(String)
|
182
|
+
value = KeyNormalizer.new(separator: value)
|
183
|
+
elsif value == true
|
184
|
+
value = KeyNormalizer.new
|
185
|
+
end
|
186
|
+
@normalize_headers = value
|
187
|
+
end
|
188
|
+
|
189
|
+
def expand_headers
|
190
|
+
return header_rows if @expand_headers == true
|
191
|
+
return [] if @expand_headers == false
|
192
|
+
|
193
|
+
@expand_headers
|
194
|
+
end
|
195
|
+
|
196
|
+
# Get options passed through to `CSV#new`.
|
197
|
+
#
|
198
|
+
# @return [Hash] A hash of the CSV options
|
199
|
+
# @see (see #initialize)
|
200
|
+
def csv_opts
|
201
|
+
Hash[CSV_OPTS.map { |k| [k, public_send(k)] }]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
data/lib/uncsv/header.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# A parsed CSV header.
|
5
|
+
class Header
|
6
|
+
# Create a new `Header` object
|
7
|
+
#
|
8
|
+
# @param headers [Array<Array<String>>] An array of header row values
|
9
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
10
|
+
def initialize(headers, config = nil)
|
11
|
+
@headers = headers
|
12
|
+
@config = config || Config.new
|
13
|
+
@to_a = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
# Iterate over each header field
|
17
|
+
#
|
18
|
+
# @yield A block to run for each header field
|
19
|
+
# @yieldparam row [String] A header field
|
20
|
+
# @return [Enumerator] An enumerator over header field
|
21
|
+
def each(&block)
|
22
|
+
to_a.each(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get an array of parsed header fields
|
26
|
+
#
|
27
|
+
# The header fields are cached, so consecutive calls to this method return
|
28
|
+
# the same array.
|
29
|
+
#
|
30
|
+
# @return [Array] The array of header fields
|
31
|
+
def to_a
|
32
|
+
@to_a ||= begin
|
33
|
+
headers = nil_empty(@headers)
|
34
|
+
headers = square(headers)
|
35
|
+
headers = normalize(headers) if @config.normalize_headers
|
36
|
+
headers = expand(headers)
|
37
|
+
combined = combine(headers)
|
38
|
+
combined = unique(combined) if @config.unique_headers
|
39
|
+
combined
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class << self
|
44
|
+
# Parse headers from a CSV
|
45
|
+
#
|
46
|
+
# @param csv [CSV] A
|
47
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object.
|
48
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
49
|
+
# @return [OpenStruct] An object with the methods `header`, `index`, and
|
50
|
+
# `rows`. `header` is the {Header} object. `index` is the next CSV row
|
51
|
+
# index. `rows` is an array of the skipped rows including the header
|
52
|
+
# rows.
|
53
|
+
def parse!(csv, config)
|
54
|
+
index = config.header_rows.empty? ? 0 : (config.header_rows.max + 1)
|
55
|
+
rows = read_rows(csv, index)
|
56
|
+
headers = config.header_rows.map { |i| rows[i] }
|
57
|
+
OpenStruct.new(
|
58
|
+
header: new(headers, config),
|
59
|
+
index: index,
|
60
|
+
rows: rows
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Read a given number of rows from a CSV
|
67
|
+
#
|
68
|
+
# @param csv [CSV] A
|
69
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object to
|
70
|
+
# read rows from.
|
71
|
+
# @param count [Integer] The number of rows to read
|
72
|
+
# @return [Array<Array<String>>] An array of the read rows
|
73
|
+
def read_rows(csv, count)
|
74
|
+
(0...count).map { csv.shift }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Combine multiple headers into a single header
|
81
|
+
#
|
82
|
+
# Joins individual headers with the `header_separator`.
|
83
|
+
#
|
84
|
+
# @param headers [Array<Array<String>>] The headers to combine
|
85
|
+
# @return [Array<String>] The combined header
|
86
|
+
def combine(headers)
|
87
|
+
headers.each_with_object([]) do |header, combined|
|
88
|
+
header.each_with_index do |key, index|
|
89
|
+
parts = [combined[index], key].compact
|
90
|
+
combined[index] = if parts.empty?
|
91
|
+
nil
|
92
|
+
else
|
93
|
+
parts.join(@config.header_separator)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Fills in `nil` headers from the left
|
100
|
+
#
|
101
|
+
# @param headers [Array<Array<String>>] The headers to expand
|
102
|
+
# @return [Array<Array<String>>] The expanded headers
|
103
|
+
def expand(headers)
|
104
|
+
headers.each_with_index.map do |header, index|
|
105
|
+
next header unless @config.expand_headers.include?(index)
|
106
|
+
|
107
|
+
last = nil
|
108
|
+
header.map do |key|
|
109
|
+
key ? last = key : last
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Unique headers by adding numbers to the end
|
115
|
+
#
|
116
|
+
# @param combined [Array<String>] The combined headers to unique
|
117
|
+
# @return [Array<String>] The uniqued headers
|
118
|
+
def unique(combined)
|
119
|
+
combined = combined.dup
|
120
|
+
collate(combined).each do |key, indexes|
|
121
|
+
next if indexes.size == 1
|
122
|
+
|
123
|
+
indexes.each_with_index do |index, count|
|
124
|
+
combined[index] = [key, count].compact.join(@config.header_separator)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
combined
|
128
|
+
end
|
129
|
+
|
130
|
+
# Create a hash of headers to arrays of their indexes
|
131
|
+
#
|
132
|
+
# Used for checking for header uniqueness
|
133
|
+
#
|
134
|
+
# @param header [Array<String>] The combined header to collate
|
135
|
+
# @return [Hash] The collated headers
|
136
|
+
def collate(header)
|
137
|
+
collated = {}
|
138
|
+
header.each_with_index do |key, index|
|
139
|
+
collated[key] = (collated[key] || []) << index
|
140
|
+
end
|
141
|
+
collated
|
142
|
+
end
|
143
|
+
|
144
|
+
# Normalize header values
|
145
|
+
#
|
146
|
+
# @param headers [Array<Array<String>>] The array of uncombined headers to
|
147
|
+
# normalize
|
148
|
+
def normalize(headers)
|
149
|
+
headers.map do |header|
|
150
|
+
header.map do |key|
|
151
|
+
@config.normalize_headers.normalize(key)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Make the headers all the same length
|
157
|
+
#
|
158
|
+
# @param headers [Array<Array<String>>] An array of headers to square
|
159
|
+
# @return [Array<Array<String>>] The squared headers
|
160
|
+
def square(headers)
|
161
|
+
length = headers.map(&:size).max
|
162
|
+
headers.map { |h| h.fill(nil, h.size, length - h.size) }
|
163
|
+
end
|
164
|
+
|
165
|
+
# Convert header empty strings to nil
|
166
|
+
#
|
167
|
+
# @param headers [Array<Array<String>>] An array of headers to convert
|
168
|
+
# @return [Array<Array<String>>] The converted headers
|
169
|
+
def nil_empty(headers)
|
170
|
+
headers.map { |h| h.map { |k| k == '' ? nil : k } }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# Normalizes strings into a consistant format
|
5
|
+
class KeyNormalizer
|
6
|
+
# The default values applied if an attribute's value is not specified when
|
7
|
+
# constructing a new `KeyNormalizer` object.
|
8
|
+
DEFAULTS = {
|
9
|
+
downcase: true,
|
10
|
+
separator: '_'
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
# A string to replace all non-alphanumeric characters in the key
|
14
|
+
#
|
15
|
+
# Default: '_'. Can be set to an empty string to remove non-alphanumeric
|
16
|
+
# characters without replacing them.
|
17
|
+
#
|
18
|
+
# @return [String] The separator string
|
19
|
+
attr_accessor :separator
|
20
|
+
|
21
|
+
# Sets keys to all lower-case if set to `true`
|
22
|
+
#
|
23
|
+
# Default: true
|
24
|
+
#
|
25
|
+
# @return [Boolean] Whether the key will be lower-cased
|
26
|
+
attr_accessor :downcase
|
27
|
+
|
28
|
+
# Create a new `KeyNormalizer` object.
|
29
|
+
#
|
30
|
+
# Options will be set to the defaults unless overridden by the `opts`
|
31
|
+
# parameter.
|
32
|
+
#
|
33
|
+
# @param opts [Hash] A hash of configuration options. See the individual
|
34
|
+
# attributes for detailed descriptions.
|
35
|
+
def initialize(opts = {})
|
36
|
+
DEFAULTS.merge(opts).each { |k, v| public_send("#{k}=", v) }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Normalize a key
|
40
|
+
#
|
41
|
+
# Replaces non-alphanumeric characters with `separator`, then
|
42
|
+
# deduplicates underscores and trims them from the ends of the key. Then
|
43
|
+
# the key is lower-cased if `downcase` is set.
|
44
|
+
#
|
45
|
+
# @param key [String, nil] The key field to normalize
|
46
|
+
# @return [String, nil] The normalized header field or `nil` if the input
|
47
|
+
# key is `nil`.
|
48
|
+
def normalize(key)
|
49
|
+
return nil if key.nil?
|
50
|
+
|
51
|
+
key = key.gsub(/[^a-z0-9]+/i, separator)
|
52
|
+
unless separator.empty?
|
53
|
+
escaped_separator = Regexp.escape(separator)
|
54
|
+
key.gsub!(/#{escaped_separator}{2,}/, separator)
|
55
|
+
key.gsub!(/^#{escaped_separator}|#{escaped_separator}$/, '')
|
56
|
+
end
|
57
|
+
key.downcase! if downcase
|
58
|
+
key
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/uncsv/row.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Uncsv
|
4
|
+
# A single data row from a CSV. Fields can be accessed by header or zero-based
|
5
|
+
# index.
|
6
|
+
class Row
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# The headers for each field
|
10
|
+
#
|
11
|
+
# If a header for a given field is not defined, it will be `nil`.
|
12
|
+
#
|
13
|
+
# @return [Array] An array of the field headers
|
14
|
+
attr_reader :header
|
15
|
+
|
16
|
+
# The fields ordered from left to right
|
17
|
+
#
|
18
|
+
# An array of zero-indexed field values. If a field is empty it will be
|
19
|
+
# `nil`, or `''` if `nil_empty` is `false`.
|
20
|
+
#
|
21
|
+
# @return [Array] An array of the field values
|
22
|
+
attr_reader :fields
|
23
|
+
|
24
|
+
# Create a new `Row` object
|
25
|
+
#
|
26
|
+
# The `header` and `fields` arrays do not need to be the same length. If
|
27
|
+
# they are not, the missing values will be filled with `nil`.
|
28
|
+
#
|
29
|
+
# @param header [Array] The field headers
|
30
|
+
# @param fields [Array] The field values
|
31
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
32
|
+
def initialize(header, fields, config = nil)
|
33
|
+
@config = config || Config.new
|
34
|
+
@header = square(header, fields.size)
|
35
|
+
@fields = square(fields, header.size).map { |f| process(f) }
|
36
|
+
@map = Hash[header.zip(@fields)]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Get a field by index or header
|
40
|
+
#
|
41
|
+
# If `key` is an `Integer`, get a field by a zero-based index. If `key` is a
|
42
|
+
# header, access a field by it's header. If `key` is nil, or if a field does
|
43
|
+
# not exist, will return `nil`.
|
44
|
+
#
|
45
|
+
# @param key [Integer, String] The index or header
|
46
|
+
# @return [String, nil] The field value if it exists
|
47
|
+
def [](key)
|
48
|
+
return if key.nil?
|
49
|
+
|
50
|
+
value = key.is_a?(Integer) ? @fields[key] : @map[key]
|
51
|
+
process(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Gets a hash of headers to fields
|
55
|
+
#
|
56
|
+
# `nil` headers will not be included in the hash.
|
57
|
+
#
|
58
|
+
# @return [Hash] A hash of headers to fields
|
59
|
+
def to_h
|
60
|
+
Hash[@header.compact.map { |h| [h, self[h]] }]
|
61
|
+
end
|
62
|
+
|
63
|
+
# Iterate over each pair of headers and fields
|
64
|
+
#
|
65
|
+
# @yield A block to run for each pair
|
66
|
+
# @yieldparam row [Row] A row object
|
67
|
+
# @return [Enumerator] An enumerator over each pair
|
68
|
+
def each(&block)
|
69
|
+
@map.each_pair(&block)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Get a field by index or header and specify a default
|
73
|
+
#
|
74
|
+
# Tries to get the field specified by key (see {#[]}). If the field
|
75
|
+
# is `nil`, returns the default. If a block is given, the default is the
|
76
|
+
# block's return value, otherwise the default is the `default` argument.
|
77
|
+
#
|
78
|
+
# @yield A block to run if the field is `nil`
|
79
|
+
# @yieldparam key [String] The `key` parameter
|
80
|
+
# @return [String, Object] The field value or default
|
81
|
+
def fetch(key, default = nil)
|
82
|
+
value = self[key]
|
83
|
+
return value unless value.nil?
|
84
|
+
|
85
|
+
block_given? ? yield(key) : default
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Fills an array with nil to extend it to the given size
|
91
|
+
#
|
92
|
+
# @param array [Array] The array to square
|
93
|
+
# @param size [Integer] The target array size
|
94
|
+
# @return [Array] The squared array
|
95
|
+
def square(array, size)
|
96
|
+
array.fill(nil, array.size, size - array.size)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Transforms a field value according to the config options
|
100
|
+
#
|
101
|
+
# @param field [String] The field value to process
|
102
|
+
# @return [String] The processed field
|
103
|
+
def process(field)
|
104
|
+
field = '' if field.nil? && !@config.nil_empty
|
105
|
+
field = nil if field == '' && @config.nil_empty
|
106
|
+
field
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/uncsv/rows.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
class Uncsv
|
6
|
+
# A collection of parsed rows from a CSV
|
7
|
+
class Rows
|
8
|
+
# Create a new `Rows` object
|
9
|
+
#
|
10
|
+
# @param csv [CSV] A
|
11
|
+
# {http://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html CSV} object.
|
12
|
+
# @param config [Config] Configuration options. Default options if `nil`.
|
13
|
+
def initialize(csv, config = nil)
|
14
|
+
@csv = csv
|
15
|
+
@config = config || Config.new
|
16
|
+
@started = false
|
17
|
+
@parsed = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
# Iterate over each row
|
21
|
+
#
|
22
|
+
# @yield A block to run for each row
|
23
|
+
# @yieldparam row [Row] A row object
|
24
|
+
# @return [Enumerator] An enumerator over each row
|
25
|
+
def each(&block)
|
26
|
+
Enumerator.new do |yielder|
|
27
|
+
start
|
28
|
+
index = parsed.index
|
29
|
+
loop do
|
30
|
+
break unless yield_row(yielder, index)
|
31
|
+
|
32
|
+
index += 1
|
33
|
+
end
|
34
|
+
end.each(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the CSV header
|
38
|
+
#
|
39
|
+
# @return [Array] An array of the CSV header fields
|
40
|
+
# @see Header#to_a
|
41
|
+
def header
|
42
|
+
parsed.header.to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
# Whether the given row should be skipped
|
48
|
+
#
|
49
|
+
# @param fields [Array] An array of field values
|
50
|
+
# @param index [Integer] The zero-based row index
|
51
|
+
# @return [Boolean] Whether the row should be skipped
|
52
|
+
def should_skip?(fields, index)
|
53
|
+
return true if @config.skip_rows[index]
|
54
|
+
return true if @config.skip_blanks && fields.compact.empty?
|
55
|
+
|
56
|
+
false
|
57
|
+
end
|
58
|
+
|
59
|
+
# Yield a row from the CSV to the Enumerator yielder
|
60
|
+
#
|
61
|
+
# Reads a row from the CSV and yields a parsed row if necessary.
|
62
|
+
#
|
63
|
+
# @param yielder [Enumerator::Yielder] A yielder to yield the row to
|
64
|
+
# @param index [Integer] The next row index
|
65
|
+
# @return [Boolean] `false` if the CSV is ended
|
66
|
+
def yield_row(yielder, index)
|
67
|
+
fields = @csv.shift
|
68
|
+
return false unless fields
|
69
|
+
|
70
|
+
unless should_skip?(fields, index)
|
71
|
+
yielder << Row.new(header, fields, @config)
|
72
|
+
end
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
# Start reading the CSV
|
77
|
+
#
|
78
|
+
# If the CSV has already been read, it will be rewound and the header will
|
79
|
+
# be reset.
|
80
|
+
def start
|
81
|
+
if @started
|
82
|
+
@parsed = nil
|
83
|
+
@csv.rewind
|
84
|
+
else
|
85
|
+
@started = true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Get the header parse object
|
90
|
+
#
|
91
|
+
# The parsed header is cached, so multiple calls will return the same
|
92
|
+
# instance.
|
93
|
+
#
|
94
|
+
# @return [OpenStruct] The parsed header object
|
95
|
+
def parsed
|
96
|
+
@parsed ||= Header.parse!(@csv, @config)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|