roo-smarter_csv 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +32 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +21 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/Gemfile +18 -0
- data/LICENSE.txt +21 -0
- data/README.md +311 -0
- data/Rakefile +24 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/lib/roo/smarter_csv/smarter_csv_adapter.rb +296 -0
- data/lib/roo/smarter_csv/version.rb +9 -0
- data/lib/roo-smarter_csv.rb +3 -0
- data/lib/roo_smarter_csv.rb +13 -0
- data/roo-smarter_csv.gemspec +41 -0
- data/sig/roo/smarter_csv.rbs +6 -0
- data/spec/fixtures/blank_fields.csv +4 -0
- data/spec/fixtures/clean_values.csv +2 -0
- data/spec/fixtures/defaults.csv +3 -0
- data/spec/fixtures/duplicate_blank_headers.csv +2 -0
- data/spec/fixtures/empty.csv +0 -0
- data/spec/fixtures/empty_rows.csv +4 -0
- data/spec/fixtures/quoted_headers.csv +2 -0
- data/spec/fixtures/sample.csv +4 -0
- data/spec/fixtures/sample.tsv +3 -0
- data/spec/fixtures/sample_bom.csv +3 -0
- data/spec/smarter_csv_spec.rb +263 -0
- data/spec/spec_helper.rb +23 -0
- metadata +179 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "date"
|
|
5
|
+
require_relative "version"
|
|
6
|
+
|
|
7
|
+
module Roo
|
|
8
|
+
# Roo CSV adapter backed by SmarterCSV while preserving Roo's sheet-style API.
|
|
9
|
+
class SmarterCSV
|
|
10
|
+
attr_reader :filename, :reader
|
|
11
|
+
|
|
12
|
+
COMPATIBLE_CSV_KEYS = %i[col_sep row_sep quote_char encoding].freeze
|
|
13
|
+
DEFAULT_SMARTER_CSV_OPTIONS = {
|
|
14
|
+
remove_empty_hashes: false
|
|
15
|
+
# collect_raw_lines: false
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
def sheets
|
|
19
|
+
["default"]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def cell(row, col, sheet = nil)
|
|
23
|
+
sheet ||= default_sheet
|
|
24
|
+
read_cells(sheet)
|
|
25
|
+
row, col = normalize(row, col)
|
|
26
|
+
|
|
27
|
+
return @header_row[col - 1] if header_row?(row)
|
|
28
|
+
|
|
29
|
+
row_hash = sparse_row_for(row)
|
|
30
|
+
return nil unless row_hash
|
|
31
|
+
|
|
32
|
+
key = header_key_for(col)
|
|
33
|
+
return nil unless key
|
|
34
|
+
return row_hash[key] if row_hash.key?(key)
|
|
35
|
+
|
|
36
|
+
missing_cell_value
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def celltype(row, col, sheet = nil)
|
|
40
|
+
sheet ||= default_sheet
|
|
41
|
+
read_cells(sheet)
|
|
42
|
+
row, col = normalize(row, col)
|
|
43
|
+
|
|
44
|
+
if header_row?(row)
|
|
45
|
+
value = @header_row[col - 1]
|
|
46
|
+
return value.nil? ? nil : infer_type(value)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
row_hash = sparse_row_for(row)
|
|
50
|
+
return nil unless row_hash
|
|
51
|
+
|
|
52
|
+
key = header_key_for(col)
|
|
53
|
+
return nil unless key
|
|
54
|
+
return infer_type(row_hash[key]) if row_hash.key?(key)
|
|
55
|
+
|
|
56
|
+
:empty
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def row(row_number, sheet = default_sheet)
|
|
60
|
+
read_cells(sheet)
|
|
61
|
+
|
|
62
|
+
if header_row?(row_number)
|
|
63
|
+
return @header_row.length < last_column(sheet) ? @header_row + Array.new(last_column(sheet) - @header_row.length) : @header_row.dup
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
row_hash = sparse_row_for(row_number)
|
|
67
|
+
return [] unless row_hash
|
|
68
|
+
|
|
69
|
+
@header_keys.map { |key| row_hash.fetch(key, missing_cell_value) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def csv_options
|
|
73
|
+
@options[:csv_options] || {}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def smarter_csv_options
|
|
77
|
+
@smarter_csv_options ||= begin
|
|
78
|
+
compat = csv_options.each_with_object({}) do |(key, value), result|
|
|
79
|
+
symbol_key = key.to_sym
|
|
80
|
+
result[symbol_key] = value if COMPATIBLE_CSV_KEYS.include?(symbol_key)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
smarter = normalize_option_hash(@options[:smarter_csv])
|
|
84
|
+
|
|
85
|
+
(compat.keys & smarter.keys).each do |key|
|
|
86
|
+
warn "roo-smarter_csv: conflicting option #{key} found in csv_options and smarter_csv; using smarter_csv[:#{key}]"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
DEFAULT_SMARTER_CSV_OPTIONS.merge(compat).merge(smarter)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def set_value(row, col, value, _sheet)
|
|
94
|
+
read_cells(default_sheet) unless @cells_read[default_sheet]
|
|
95
|
+
row, col = normalize(row, col)
|
|
96
|
+
|
|
97
|
+
if header_row?(row)
|
|
98
|
+
ensure_header_width(col)
|
|
99
|
+
@header_row[col - 1] = value
|
|
100
|
+
else
|
|
101
|
+
ensure_data_row(row)
|
|
102
|
+
ensure_header_width(col)
|
|
103
|
+
@rows[data_row_index(row)][header_key_for(col)] = value
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
recalculate_bounds
|
|
107
|
+
value
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def set_type(_row, _col, _type, _sheet)
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
alias filename_or_stream filename
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
def read_cells(sheet = default_sheet)
|
|
119
|
+
sheet ||= default_sheet
|
|
120
|
+
return if @cells_read[sheet]
|
|
121
|
+
|
|
122
|
+
@reader = nil
|
|
123
|
+
@header_row = []
|
|
124
|
+
@header_keys = []
|
|
125
|
+
@rows = []
|
|
126
|
+
|
|
127
|
+
with_source do |source|
|
|
128
|
+
@reader = ::SmarterCSV::Reader.new(source, smarter_csv_options)
|
|
129
|
+
@reader.process do |row_data|
|
|
130
|
+
store_rows(row_data)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@header_row = parsed_header_row(@reader)
|
|
134
|
+
@header_keys = Array(@reader.headers).dup
|
|
135
|
+
|
|
136
|
+
set_row_count(sheet, total_rows)
|
|
137
|
+
set_column_count(sheet, [@header_row.length, @header_keys.length].max)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
@cells_read[sheet] = true
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def with_source
|
|
144
|
+
if uri?(filename)
|
|
145
|
+
::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir|
|
|
146
|
+
yield download_uri(filename, tmpdir)
|
|
147
|
+
end
|
|
148
|
+
else
|
|
149
|
+
yield filename_or_stream
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def parsed_header_row(reader)
|
|
154
|
+
raw_header = reader.raw_header
|
|
155
|
+
return [] unless raw_header
|
|
156
|
+
|
|
157
|
+
header = ::CSV.parse_line(
|
|
158
|
+
raw_header,
|
|
159
|
+
col_sep: reader.options[:col_sep] || ",",
|
|
160
|
+
quote_char: reader.options.fetch(:quote_char, '"')
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
Array(header)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def store_rows(row_data)
|
|
167
|
+
case row_data
|
|
168
|
+
when Array
|
|
169
|
+
row_data.each { |entry| @rows << sparse_row_hash(entry) }
|
|
170
|
+
else
|
|
171
|
+
@rows << sparse_row_hash(row_data)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def sparse_row_hash(row_data)
|
|
176
|
+
case row_data
|
|
177
|
+
when Hash
|
|
178
|
+
row_data.dup
|
|
179
|
+
when Array
|
|
180
|
+
current_headers = Array(@reader&.headers)
|
|
181
|
+
row_data.each_with_index.each_with_object({}) do |(value, index), result|
|
|
182
|
+
key = current_headers[index] || generated_header_key(index + 1)
|
|
183
|
+
result[key] = value
|
|
184
|
+
end
|
|
185
|
+
when NilClass
|
|
186
|
+
{}
|
|
187
|
+
else
|
|
188
|
+
{ generated_header_key(1) => row_data }
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def infer_type(value)
|
|
193
|
+
case value
|
|
194
|
+
when NilClass then :empty
|
|
195
|
+
when TrueClass, FalseClass then :boolean
|
|
196
|
+
when Integer then :numeric
|
|
197
|
+
when Float then :float
|
|
198
|
+
when Date then :date
|
|
199
|
+
when DateTime, Time then :datetime
|
|
200
|
+
when String then value.empty? ? :empty : :string
|
|
201
|
+
else :string
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def normalize_option_hash(hash)
|
|
206
|
+
return {} unless hash.is_a?(Hash)
|
|
207
|
+
|
|
208
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
209
|
+
result[key.to_sym] = value
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def set_row_count(sheet, last_row)
|
|
214
|
+
@first_row[sheet] = 1
|
|
215
|
+
@last_row[sheet] = last_row
|
|
216
|
+
@last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero?
|
|
217
|
+
nil
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def set_column_count(sheet, last_col)
|
|
221
|
+
@first_column[sheet] = 1
|
|
222
|
+
@last_column[sheet] = last_col
|
|
223
|
+
@last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero?
|
|
224
|
+
nil
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def clean_sheet(sheet)
|
|
228
|
+
read_cells(sheet)
|
|
229
|
+
@header_row = @header_row.map { |value| value.is_a?(String) ? sanitize_value(value) : value }
|
|
230
|
+
@rows.each do |row_hash|
|
|
231
|
+
row_hash.each do |key, value|
|
|
232
|
+
row_hash[key] = sanitize_value(value) if value.is_a?(String)
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
@cleaned ||= {}
|
|
236
|
+
@cleaned[sheet] = true
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def sanitize_value(value)
|
|
240
|
+
value.gsub(/[[:cntrl:]]|^\p{Space}+|\p{Space}+$/, "")
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def reinitialize
|
|
244
|
+
initialize(@filename, @options)
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def header_row?(row)
|
|
248
|
+
@header_row.any? && row == 1
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def total_rows
|
|
252
|
+
@rows.length + (@header_row.any? ? 1 : 0)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def sparse_row_for(row)
|
|
256
|
+
index = data_row_index(row)
|
|
257
|
+
return nil if index.negative? || index >= @rows.length
|
|
258
|
+
|
|
259
|
+
@rows[index]
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def data_row_index(row)
|
|
263
|
+
row - (@header_row.any? ? 2 : 1)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def header_key_for(col)
|
|
267
|
+
@header_keys[col - 1]
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def missing_cell_value
|
|
271
|
+
nil
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def ensure_data_row(row)
|
|
275
|
+
@rows << {} while data_row_index(row) >= @rows.length
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def ensure_header_width(col)
|
|
279
|
+
@header_keys << generated_header_key(@header_keys.length + 1) while @header_keys.length < col
|
|
280
|
+
@header_row << nil while @header_row.length < col
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def generated_header_key(col)
|
|
284
|
+
"column_#{col}".to_sym
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def recalculate_bounds
|
|
288
|
+
sheet = default_sheet
|
|
289
|
+
@first_row[sheet] = 1
|
|
290
|
+
@last_row[sheet] = total_rows.zero? ? 1 : total_rows
|
|
291
|
+
@first_column[sheet] = 1
|
|
292
|
+
@last_column[sheet] = [@header_row.length, @header_keys.length].max
|
|
293
|
+
@last_column[sheet] = 1 if @last_column[sheet].zero?
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "roo"
|
|
4
|
+
require "smarter_csv"
|
|
5
|
+
require_relative "roo/smarter_csv/smarter_csv_adapter"
|
|
6
|
+
|
|
7
|
+
# Roo namespace extensions for the SmarterCSV-backed CSV adapter.
|
|
8
|
+
module Roo
|
|
9
|
+
# Registers the SmarterCSV-backed CSV adapter as Roo's CSV handler.
|
|
10
|
+
CLASS_FOR_EXTENSION.merge!(
|
|
11
|
+
csv: Roo::SmarterCSV
|
|
12
|
+
)
|
|
13
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
lib = File.expand_path("lib", __dir__)
|
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
|
+
require_relative "lib/roo/smarter_csv/version"
|
|
6
|
+
|
|
7
|
+
Gem::Specification.new do |spec|
|
|
8
|
+
spec.name = "roo-smarter_csv"
|
|
9
|
+
spec.date = Time.now.utc.strftime('%Y-%m-%d')
|
|
10
|
+
spec.version = Roo::SmarterCSV::VERSION
|
|
11
|
+
spec.authors = ["Tilo Sloboda"]
|
|
12
|
+
spec.email = ["tilo.slobodal@gmail.com"]
|
|
13
|
+
spec.summary = "High-performance CSV support for Roo using SmarterCSV"
|
|
14
|
+
spec.description = "Extends Roo with SmarterCSV integration for robust and faster CSV parsing"
|
|
15
|
+
|
|
16
|
+
spec.license = "MIT"
|
|
17
|
+
|
|
18
|
+
spec.homepage = "https://github.com/tilo/roo-smarter_csv"
|
|
19
|
+
|
|
20
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
21
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
|
22
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
23
|
+
spec.metadata["documentation_uri"] = "#{spec.homepage}/tree/main/"
|
|
24
|
+
spec.metadata["bug_tracker_uri"] = "#{spec.homepage}/issues"
|
|
25
|
+
|
|
26
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
27
|
+
spec.require_paths = ["lib"]
|
|
28
|
+
|
|
29
|
+
spec.required_ruby_version = ">= 2.6.0" # same as SmarterCSV
|
|
30
|
+
|
|
31
|
+
# Dependencies
|
|
32
|
+
spec.add_dependency "roo", ">= 2.0.0", "< 4" # roo 4.x is not out yet
|
|
33
|
+
spec.add_dependency "smarter_csv", ">= 1.15.0" # it is recommended to use the latest version
|
|
34
|
+
|
|
35
|
+
# Development dependencies
|
|
36
|
+
spec.add_development_dependency "bundler", ">= 1.7"
|
|
37
|
+
spec.add_development_dependency "matrix"
|
|
38
|
+
spec.add_development_dependency "minitest", ">= 5.19.0"
|
|
39
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
|
40
|
+
spec.add_development_dependency "rspec", ">= 3.0"
|
|
41
|
+
end
|
|
File without changes
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.describe Roo::SmarterCSV do
|
|
6
|
+
let(:csv_path) { File.expand_path("fixtures/sample.csv", __dir__) }
|
|
7
|
+
let(:tsv_path) { File.expand_path("fixtures/sample.tsv", __dir__) }
|
|
8
|
+
let(:bom_path) { File.expand_path("fixtures/sample_bom.csv", __dir__) }
|
|
9
|
+
let(:empty_path) { File.expand_path("fixtures/empty.csv", __dir__) }
|
|
10
|
+
let(:defaults_path) { File.expand_path("fixtures/defaults.csv", __dir__) }
|
|
11
|
+
let(:blank_fields_path) { File.expand_path("fixtures/blank_fields.csv", __dir__) }
|
|
12
|
+
let(:empty_rows_path) { File.expand_path("fixtures/empty_rows.csv", __dir__) }
|
|
13
|
+
let(:duplicate_blank_headers_path) { File.expand_path("fixtures/duplicate_blank_headers.csv", __dir__) }
|
|
14
|
+
let(:quoted_headers_path) { File.expand_path("fixtures/quoted_headers.csv", __dir__) }
|
|
15
|
+
let(:clean_values_path) { File.expand_path("fixtures/clean_values.csv", __dir__) }
|
|
16
|
+
let(:csv) { Roo::SmarterCSV.new(csv_path) }
|
|
17
|
+
|
|
18
|
+
describe "Roo interface" do
|
|
19
|
+
it "returns sheets array" do
|
|
20
|
+
expect(csv.sheets).to eq(["default"])
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "provides default_sheet" do
|
|
24
|
+
expect(csv.default_sheet).to eq("default")
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "accesses cells by row and column" do
|
|
28
|
+
expect(csv.cell(2, 1)).to eq("John")
|
|
29
|
+
expect(csv.cell(2, 2)).to eq(30)
|
|
30
|
+
expect(csv.cell(2, 3)).to eq("john@example.com")
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "detects cell types" do
|
|
34
|
+
expect(csv.celltype(1, 1)).to eq(:string) # "Name"
|
|
35
|
+
expect(csv.celltype(2, 2)).to eq(:numeric) # 30
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "provides row access" do
|
|
39
|
+
row = csv.row(2)
|
|
40
|
+
expect(row).to eq(["John", 30, "john@example.com", 50_000])
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "provides boundary methods" do
|
|
44
|
+
expect(csv.first_row).to eq(1)
|
|
45
|
+
expect(csv.last_row).to eq(4) # 3 data rows + 1 header
|
|
46
|
+
expect(csv.first_column).to eq(1)
|
|
47
|
+
expect(csv.last_column).to eq(4)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "iterates rows" do
|
|
51
|
+
rows = []
|
|
52
|
+
csv.each { |row| rows << row }
|
|
53
|
+
expect(rows.length).to eq(4)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it "parses with headers" do
|
|
57
|
+
data = csv.parse(headers: true)
|
|
58
|
+
expect(data.first).to be_a(Hash)
|
|
59
|
+
expect(data.first.keys).to eq(%w[Name Age Email Salary])
|
|
60
|
+
expect(data.first["Name"]).to eq("Name")
|
|
61
|
+
expect(data[1]["Name"]).to eq("John")
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
describe "Type detection" do
|
|
66
|
+
it "detects numeric types" do
|
|
67
|
+
expect(csv.celltype(2, 2)).to eq(:numeric) # 30
|
|
68
|
+
expect(csv.cell(2, 2)).to eq(30)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "detects string types" do
|
|
72
|
+
expect(csv.celltype(2, 1)).to eq(:string)
|
|
73
|
+
expect(csv.cell(2, 1)).to eq("John")
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
describe "Integration with Roo::Spreadsheet" do
|
|
78
|
+
it "registers itself as Roo CSV handler" do
|
|
79
|
+
expect(Roo::CLASS_FOR_EXTENSION[:csv]).to eq(Roo::SmarterCSV)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "can be opened via Roo::Spreadsheet.open" do
|
|
83
|
+
spreadsheet = Roo::Spreadsheet.open(csv_path)
|
|
84
|
+
expect(spreadsheet).to be_a(Roo::SmarterCSV)
|
|
85
|
+
expect(spreadsheet.cell(2, 1)).to eq("John")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "works with StringIO input" do
|
|
89
|
+
spreadsheet = Roo::Spreadsheet.open(StringIO.new(File.read(csv_path)), extension: :csv)
|
|
90
|
+
expect(spreadsheet).to be_a(Roo::SmarterCSV)
|
|
91
|
+
expect(spreadsheet.cell(2, 2)).to eq(30)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "reads files with a UTF-8 BOM" do
|
|
95
|
+
spreadsheet = Roo::Spreadsheet.open(bom_path)
|
|
96
|
+
expect(spreadsheet.cell(2, 1)).to eq("John")
|
|
97
|
+
expect(spreadsheet.cell(2, 4)).to eq(50_000)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it "treats an empty csv file as an error" do
|
|
101
|
+
expect { Roo::Spreadsheet.open(empty_path).cell(1, 1) }.to raise_error(SmarterCSV::EmptyFileError)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
it "accepts csv_options from Roo and bridges them into SmarterCSV" do
|
|
105
|
+
spreadsheet = Roo::Spreadsheet.open(tsv_path, extension: :csv, csv_options: { col_sep: "\t" })
|
|
106
|
+
expect(spreadsheet.cell(2, 1)).to eq("John")
|
|
107
|
+
expect(spreadsheet.cell(2, 4)).to eq(50_000)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "uses SmarterCSV defaults when no options are supplied" do
|
|
111
|
+
spreadsheet = Roo::Spreadsheet.open(defaults_path)
|
|
112
|
+
expect(spreadsheet.cell(2, 2)).to eq("hello, world")
|
|
113
|
+
expect(spreadsheet.cell(2, 3)).to eq(30)
|
|
114
|
+
expect(spreadsheet.cell(2, 4)).to eq(1.5)
|
|
115
|
+
expect(spreadsheet.celltype(2, 3)).to eq(:numeric)
|
|
116
|
+
expect(spreadsheet.celltype(2, 4)).to eq(:float)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it "preserves blank fields in the middle of rows" do
|
|
120
|
+
spreadsheet = Roo::Spreadsheet.open(blank_fields_path)
|
|
121
|
+
expect(spreadsheet.row(2)).to eq([1, "John", "Doe", nil, "Portland", "OR", 97_201])
|
|
122
|
+
expect(spreadsheet.row(3)).to eq([2, "Jane", "Smith", "Apt 3", "Seattle", "WA", nil])
|
|
123
|
+
expect(spreadsheet.row(4)).to eq([3, "Bob", "Jones", nil, nil, "CA", 90_210])
|
|
124
|
+
expect(spreadsheet.cell(2, 4)).to be_nil
|
|
125
|
+
expect(spreadsheet.celltype(2, 4)).to eq(:empty)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
it "preserves fully empty rows end-to-end" do
|
|
129
|
+
spreadsheet = Roo::Spreadsheet.open(empty_rows_path)
|
|
130
|
+
expect(spreadsheet.last_row).to eq(4)
|
|
131
|
+
expect(spreadsheet.row(3)).to eq([nil, nil, nil])
|
|
132
|
+
expect(spreadsheet.celltype(3, 1)).to eq(:empty)
|
|
133
|
+
expect(spreadsheet.row(4)).to eq([2, "Bob", 40])
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it "supports SmarterCSV chunk_size without changing visible spreadsheet rows" do
|
|
137
|
+
spreadsheet = Roo::SmarterCSV.new(csv_path, smarter_csv: { chunk_size: 2 })
|
|
138
|
+
expect(spreadsheet.last_row).to eq(4)
|
|
139
|
+
expect(spreadsheet.row(2)).to eq(["John", 30, "john@example.com", 50_000])
|
|
140
|
+
expect(spreadsheet.row(4)).to eq(["Bob", 35, "bob@example.com", 55_000])
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
it "supports user_provided_headers when headers_in_file is false" do
|
|
144
|
+
spreadsheet = Roo::SmarterCSV.new(
|
|
145
|
+
StringIO.new("1,John\n2,Jane\n"),
|
|
146
|
+
smarter_csv: { headers_in_file: false, user_provided_headers: %i[id name] }
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
expect(spreadsheet.first_row).to eq(1)
|
|
150
|
+
expect(spreadsheet.last_row).to eq(2)
|
|
151
|
+
expect(spreadsheet.row(1)).to eq([1, "John"])
|
|
152
|
+
expect(spreadsheet.row(2)).to eq([2, "Jane"])
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
it "handles duplicate and blank headers without shifting data columns" do
|
|
156
|
+
spreadsheet = Roo::Spreadsheet.open(duplicate_blank_headers_path)
|
|
157
|
+
expect(spreadsheet.row(1)).to eq(["name", nil, "name"])
|
|
158
|
+
expect(spreadsheet.row(2)).to eq(["Alice", nil, "Bob"])
|
|
159
|
+
expect(spreadsheet.last_column).to eq(3)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
it "parses quoted headers containing separators correctly" do
|
|
163
|
+
spreadsheet = Roo::Spreadsheet.open(quoted_headers_path)
|
|
164
|
+
expect(spreadsheet.row(1)).to eq(["Last, First", "Age", "City, State"])
|
|
165
|
+
expect(spreadsheet.row(2)).to eq(["Doe, Jane", 30, "Portland, OR"])
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it "supports clean: true through Roo's public parse API" do
|
|
169
|
+
spreadsheet = Roo::Spreadsheet.open(clean_values_path)
|
|
170
|
+
data = spreadsheet.parse(headers: true, clean: true)
|
|
171
|
+
|
|
172
|
+
expect(data.first.keys).to eq(%w[Name City])
|
|
173
|
+
expect(data[1]).to eq({ "Name" => "John", "City" => "Portland" })
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
it "prefers smarter_csv options over csv_options and emits a warning" do
|
|
177
|
+
spreadsheet = Roo::SmarterCSV.new(csv_path, csv_options: { col_sep: ";" }, smarter_csv: { col_sep: "," })
|
|
178
|
+
expect { spreadsheet.cell(2, 1) }.to output(/conflicting option col_sep/).to_stderr
|
|
179
|
+
expect(spreadsheet.cell(2, 1)).to eq("John")
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
describe "private helper coverage" do
|
|
184
|
+
let(:adapter) { described_class.new(csv_path) }
|
|
185
|
+
|
|
186
|
+
it "covers private helper methods and mutation helpers" do
|
|
187
|
+
expect(adapter.send(:normalize_option_hash, nil)).to eq({})
|
|
188
|
+
expect(adapter.send(:normalize_option_hash, "nope")).to eq({})
|
|
189
|
+
expect(adapter.send(:normalize_option_hash, "a" => 1)).to eq({ a: 1 })
|
|
190
|
+
|
|
191
|
+
expect(adapter.send(:infer_type, nil)).to eq(:empty)
|
|
192
|
+
expect(adapter.send(:infer_type, true)).to eq(:boolean)
|
|
193
|
+
expect(adapter.send(:infer_type, false)).to eq(:boolean)
|
|
194
|
+
expect(adapter.send(:infer_type, Date.new(2024, 1, 1))).to eq(:date)
|
|
195
|
+
expect(adapter.send(:infer_type, Time.new(2024, 1, 1, 12, 0, 0))).to eq(:datetime)
|
|
196
|
+
expect(adapter.send(:infer_type, Object.new)).to eq(:string)
|
|
197
|
+
|
|
198
|
+
expect(adapter.send(:sanitize_value, " \tHello\n")).to eq("Hello")
|
|
199
|
+
expect(adapter.send(:generated_header_key, 3)).to eq(:column_3)
|
|
200
|
+
expect(adapter.send(:set_type, 1, 1, :string, nil)).to be_nil
|
|
201
|
+
|
|
202
|
+
adapter.instance_variable_set(:@reader, double(headers: %i[a b]))
|
|
203
|
+
expect(adapter.send(:sparse_row_hash, { a: 1 })).to eq({ a: 1 })
|
|
204
|
+
expect(adapter.send(:sparse_row_hash, %w[x y z])).to eq({ a: "x", b: "y", column_3: "z" })
|
|
205
|
+
expect(adapter.send(:sparse_row_hash, nil)).to eq({})
|
|
206
|
+
expect(adapter.send(:sparse_row_hash, "solo")).to eq({ column_1: "solo" })
|
|
207
|
+
|
|
208
|
+
adapter.instance_variable_set(:@rows, [])
|
|
209
|
+
adapter.send(:store_rows, { a: 1 })
|
|
210
|
+
adapter.send(:store_rows, [{ a: 2 }, nil])
|
|
211
|
+
expect(adapter.instance_variable_get(:@rows)).to eq([{ a: 1 }, { a: 2 }, {}])
|
|
212
|
+
|
|
213
|
+
adapter.instance_variable_set(:@header_row, ["H1"])
|
|
214
|
+
adapter.instance_variable_set(:@header_keys, [:h1])
|
|
215
|
+
adapter.instance_variable_set(:@rows, [])
|
|
216
|
+
adapter.send(:ensure_data_row, 3)
|
|
217
|
+
adapter.send(:ensure_header_width, 3)
|
|
218
|
+
expect(adapter.instance_variable_get(:@rows).length).to eq(2)
|
|
219
|
+
expect(adapter.instance_variable_get(:@header_keys)).to eq(%i[h1 column_2 column_3])
|
|
220
|
+
expect(adapter.instance_variable_get(:@header_row)).to eq(["H1", nil, nil])
|
|
221
|
+
|
|
222
|
+
adapter.instance_variable_set(:@cells_read, { "default" => true })
|
|
223
|
+
adapter.instance_variable_set(:@first_row, {})
|
|
224
|
+
adapter.instance_variable_set(:@last_row, {})
|
|
225
|
+
adapter.instance_variable_set(:@first_column, {})
|
|
226
|
+
adapter.instance_variable_set(:@last_column, {})
|
|
227
|
+
adapter.set_value(1, 2, "Header", nil)
|
|
228
|
+
adapter.set_value(2, 3, "Value", nil)
|
|
229
|
+
expect(adapter.row(1)).to eq(["H1", "Header", nil])
|
|
230
|
+
expect(adapter.row(2)).to eq([nil, nil, "Value"])
|
|
231
|
+
expect(adapter.celltype(1, 3)).to be_nil
|
|
232
|
+
expect(adapter.first_row).to eq(1)
|
|
233
|
+
expect(adapter.last_row).to eq(3)
|
|
234
|
+
expect(adapter.first_column).to eq(1)
|
|
235
|
+
expect(adapter.last_column).to eq(3)
|
|
236
|
+
|
|
237
|
+
adapter.instance_variable_set(:@header_row, [" Head\n", nil])
|
|
238
|
+
adapter.instance_variable_set(:@rows, [{ a: " row\t" }, { b: 1 }])
|
|
239
|
+
adapter.instance_variable_set(:@cells_read, { "default" => true })
|
|
240
|
+
adapter.send(:clean_sheet, "default")
|
|
241
|
+
expect(adapter.instance_variable_get(:@header_row)).to eq(["Head", nil])
|
|
242
|
+
expect(adapter.instance_variable_get(:@rows).first[:a]).to eq("row")
|
|
243
|
+
expect(adapter.instance_variable_get(:@cleaned)["default"]).to eq(true)
|
|
244
|
+
|
|
245
|
+
other = described_class.new(csv_path, csv_options: { col_sep: ";" })
|
|
246
|
+
expect(other.send(:reinitialize)).to eq(1)
|
|
247
|
+
expect(other).to be_a(described_class)
|
|
248
|
+
expect(other.csv_options).to eq(col_sep: ";")
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
it "uses the URI source path when reading remote files" do
|
|
252
|
+
remote = described_class.new("http://example.com/test.csv")
|
|
253
|
+
allow(remote).to receive(:uri?).and_return(true)
|
|
254
|
+
allow(remote).to receive(:download_uri).with("http://example.com/test.csv", "/tmp/roo-test").and_return("/tmp/downloaded.csv")
|
|
255
|
+
|
|
256
|
+
yielded = nil
|
|
257
|
+
allow(Dir).to receive(:mktmpdir).and_yield("/tmp/roo-test")
|
|
258
|
+
remote.send(:with_source) { |source| yielded = source }
|
|
259
|
+
|
|
260
|
+
expect(yielded).to eq("/tmp/downloaded.csv")
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|