smarter_csv 1.15.2 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +2 -0
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +112 -1
- data/CONTRIBUTORS.md +4 -1
- data/Gemfile +1 -0
- data/README.md +129 -27
- data/docs/_introduction.md +45 -24
- data/docs/bad_row_quarantine.md +342 -0
- data/docs/basic_read_api.md +152 -9
- data/docs/basic_write_api.md +475 -59
- data/docs/batch_processing.md +162 -4
- data/docs/column_selection.md +184 -0
- data/docs/data_transformations.md +163 -29
- data/docs/examples.md +340 -46
- data/docs/header_transformations.md +94 -12
- data/docs/header_validations.md +57 -18
- data/docs/history.md +119 -0
- data/docs/instrumentation.md +166 -0
- data/docs/migrating_from_csv.md +565 -0
- data/docs/options.md +151 -87
- data/docs/parsing_strategy.md +64 -1
- data/docs/real_world_csv.md +263 -0
- data/docs/releases/1.16.0/benchmarks.md +223 -0
- data/docs/releases/1.16.0/changes.md +273 -0
- data/docs/releases/1.16.0/performance_notes.md +114 -0
- data/docs/row_col_sep.md +15 -5
- data/docs/ruby_csv_pitfalls.md +514 -0
- data/docs/value_converters.md +194 -57
- data/ext/smarter_csv/extconf.rb +3 -0
- data/ext/smarter_csv/smarter_csv.c +1017 -82
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg +108 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.svg +141 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.svg +139 -0
- data/lib/smarter_csv/errors.rb +8 -0
- data/lib/smarter_csv/file_io.rb +1 -1
- data/lib/smarter_csv/hash_transformations.rb +14 -13
- data/lib/smarter_csv/header_transformations.rb +21 -2
- data/lib/smarter_csv/headers.rb +2 -1
- data/lib/smarter_csv/options.rb +124 -7
- data/lib/smarter_csv/parser.rb +358 -74
- data/lib/smarter_csv/reader.rb +494 -46
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +71 -19
- data/lib/smarter_csv.rb +134 -13
- data/smarter_csv.gemspec +20 -10
- metadata +38 -80
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv/writer.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'tempfile'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
require 'set'
|
|
4
6
|
|
|
5
7
|
module SmarterCSV
|
|
6
8
|
#
|
|
@@ -33,6 +35,13 @@ module SmarterCSV
|
|
|
33
35
|
# force_quotes: defaults to false
|
|
34
36
|
# map_headers: defaults to {}, can be a hash of key -> value mappings
|
|
35
37
|
# value_converters: optional hash of key -> lambda to control serialization
|
|
38
|
+
# encoding: optional encoding string for the output file, e.g. 'UTF-8', 'ISO-8859-1'
|
|
39
|
+
# supports Ruby's 'external:internal' transcoding notation, e.g. 'ISO-8859-1:UTF-8'
|
|
40
|
+
# defaults to nil (system default). Only applies when writing to a file path.
|
|
41
|
+
# write_nil_value: string written in place of nil field values (default: '')
|
|
42
|
+
# write_empty_value: string written in place of empty-string field values (default: '')
|
|
43
|
+
# write_bom: when true, prepends a UTF-8 BOM (\xEF\xBB\xBF) to the output (default: false)
|
|
44
|
+
# Useful for Excel compatibility with non-ASCII content.
|
|
36
45
|
|
|
37
46
|
# IMPORTANT NOTES:
|
|
38
47
|
# * Data hashes could contain strings or symbols as keys.
|
|
@@ -42,18 +51,23 @@ module SmarterCSV
|
|
|
42
51
|
attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
|
|
43
52
|
|
|
44
53
|
class Writer
|
|
45
|
-
def initialize(
|
|
54
|
+
def initialize(file_path_or_io, options = {})
|
|
46
55
|
@options = options
|
|
47
56
|
|
|
48
57
|
@row_sep = options[:row_sep] || $/
|
|
49
58
|
@col_sep = options[:col_sep] || ','
|
|
50
59
|
@quote_char = options[:quote_char] || '"'
|
|
60
|
+
@escaped_quote_char = @quote_char * 2
|
|
51
61
|
@force_quotes = options[:force_quotes] == true
|
|
52
62
|
@quote_headers = options[:quote_headers] == true
|
|
53
63
|
@disable_auto_quoting = options[:disable_auto_quoting] == true
|
|
54
64
|
@value_converters = options[:value_converters] || {}
|
|
65
|
+
@encoding = options[:encoding]
|
|
66
|
+
@write_nil_value = options.fetch(:write_nil_value, '')
|
|
67
|
+
@write_empty_value = options.fetch(:write_empty_value, '')
|
|
68
|
+
@write_bom = options[:write_bom] == true
|
|
55
69
|
@map_all_keys = @value_converters.has_key?(:_all)
|
|
56
|
-
@mapped_keys = @value_converters.keys - [:_all]
|
|
70
|
+
@mapped_keys = Set.new(@value_converters.keys - [:_all])
|
|
57
71
|
@header_converter = options[:header_converter]
|
|
58
72
|
|
|
59
73
|
@discover_headers = true
|
|
@@ -68,9 +82,38 @@ module SmarterCSV
|
|
|
68
82
|
@headers = options[:map_headers].keys if options.has_key?(:map_headers) && !options.has_key?(:headers)
|
|
69
83
|
@map_headers = options[:map_headers] || {}
|
|
70
84
|
|
|
71
|
-
|
|
72
|
-
|
|
85
|
+
# Accept an IO-like object (StringIO, IO, etc.) or any path-like object (String, Pathname, etc.)
|
|
86
|
+
if file_path_or_io.respond_to?(:write)
|
|
87
|
+
# External IO handed in — we should not close it ourselves.
|
|
88
|
+
@output_file = file_path_or_io
|
|
89
|
+
@file_opened_by_us = false
|
|
90
|
+
else
|
|
91
|
+
path =
|
|
92
|
+
if file_path_or_io.respond_to?(:to_path)
|
|
93
|
+
file_path_or_io.to_path
|
|
94
|
+
elsif file_path_or_io.is_a?(String)
|
|
95
|
+
file_path_or_io
|
|
96
|
+
else
|
|
97
|
+
raise ArgumentError,
|
|
98
|
+
"SmarterCSV::Writer expects an IO-like object (responding to #write) " \
|
|
99
|
+
"or a path-like object (responding to #to_path or being a String), " \
|
|
100
|
+
"but got #{file_path_or_io.class}"
|
|
101
|
+
end
|
|
102
|
+
mode = @encoding ? "w+:#{@encoding}" : 'w+'
|
|
103
|
+
@output_file = File.open(path, mode)
|
|
104
|
+
@file_opened_by_us = true
|
|
105
|
+
end
|
|
73
106
|
@quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char)
|
|
107
|
+
|
|
108
|
+
if !@discover_headers && !@headers.empty?
|
|
109
|
+
# Headers are fully known at construction time — write the header line immediately
|
|
110
|
+
# and stream data rows directly to @output_file, bypassing the temp file entirely.
|
|
111
|
+
@temp_file = nil
|
|
112
|
+
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
113
|
+
write_header_line
|
|
114
|
+
else
|
|
115
|
+
@temp_file = Tempfile.new('smarter_csv')
|
|
116
|
+
end
|
|
74
117
|
end
|
|
75
118
|
|
|
76
119
|
def <<(data)
|
|
@@ -82,30 +125,36 @@ module SmarterCSV
|
|
|
82
125
|
when NilClass
|
|
83
126
|
# ignore
|
|
84
127
|
else
|
|
85
|
-
# :nocov:
|
|
86
128
|
raise InvalidInputData, "Invalid data type: #{data.class}. Must be a Hash or an Array."
|
|
87
|
-
# :nocov:
|
|
88
129
|
end
|
|
89
130
|
end
|
|
90
131
|
|
|
91
132
|
def finalize
|
|
92
|
-
|
|
133
|
+
if @temp_file
|
|
134
|
+
# Header-discovery mode: headers were accumulated while writing rows;
|
|
135
|
+
# now prepend the header line and copy the buffered rows to the output.
|
|
136
|
+
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
137
|
+
write_header_line
|
|
138
|
+
@temp_file.rewind
|
|
139
|
+
@output_file.write(@temp_file.read)
|
|
140
|
+
@temp_file.close!
|
|
141
|
+
end
|
|
142
|
+
# In direct-write mode (@temp_file == nil) the header line and all data rows
|
|
143
|
+
# were already written to @output_file — nothing left to do but flush and close.
|
|
144
|
+
@output_file.flush
|
|
145
|
+
@output_file.close if @file_opened_by_us # only close files we opened; caller owns external IO objects
|
|
146
|
+
end
|
|
93
147
|
|
|
94
|
-
|
|
148
|
+
private
|
|
95
149
|
|
|
150
|
+
def write_header_line
|
|
151
|
+
mapped_headers = @headers.map { |header| @map_headers[header] || header }
|
|
152
|
+
mapped_headers = @headers.map { |header| @header_converter.call(header) } if @header_converter
|
|
96
153
|
force_quotes = @quote_headers || @force_quotes
|
|
97
154
|
mapped_headers = mapped_headers.map { |x| escape_csv_field(x, force_quotes) }
|
|
98
|
-
|
|
99
|
-
@temp_file.rewind
|
|
100
155
|
@output_file.write(mapped_headers.join(@col_sep) + @row_sep) unless mapped_headers.empty?
|
|
101
|
-
@output_file.write(@temp_file.read)
|
|
102
|
-
@output_file.flush
|
|
103
|
-
@output_file.close
|
|
104
|
-
@temp_file.delete
|
|
105
156
|
end
|
|
106
157
|
|
|
107
|
-
private
|
|
108
|
-
|
|
109
158
|
def process_hash(hash)
|
|
110
159
|
if @discover_headers
|
|
111
160
|
hash_keys = hash.keys
|
|
@@ -124,10 +173,13 @@ module SmarterCSV
|
|
|
124
173
|
# then apply general mapping rules
|
|
125
174
|
value = map_all_values(header, value) if @map_all_keys
|
|
126
175
|
|
|
176
|
+
value = @write_nil_value if value.nil?
|
|
177
|
+
value = @write_empty_value if !value.nil? && value.respond_to?(:empty?) && value.empty?
|
|
178
|
+
|
|
127
179
|
escape_csv_field(value, @force_quotes) # for backwards compatibility
|
|
128
180
|
end
|
|
129
181
|
|
|
130
|
-
@temp_file.write(ordered_row.join(@col_sep)
|
|
182
|
+
(@temp_file || @output_file).write(ordered_row.join(@col_sep) << @row_sep) unless ordered_row.empty?
|
|
131
183
|
end
|
|
132
184
|
|
|
133
185
|
def map_value(key, value)
|
|
@@ -143,9 +195,9 @@ module SmarterCSV
|
|
|
143
195
|
return str if @disable_auto_quoting && !force_quotes
|
|
144
196
|
|
|
145
197
|
# double-quote fields if we force that, or if the field contains the comma, new-line, or quote character
|
|
146
|
-
contains_special_char = str.
|
|
198
|
+
contains_special_char = str.match(@quote_regex)
|
|
147
199
|
if force_quotes || contains_special_char
|
|
148
|
-
str = str.gsub(@quote_char, @
|
|
200
|
+
str = str.gsub(@quote_char, @escaped_quote_char) if contains_special_char # escape double-quote
|
|
149
201
|
|
|
150
202
|
"\"#{str}\""
|
|
151
203
|
else
|
data/lib/smarter_csv.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'stringio'
|
|
3
4
|
require "smarter_csv/version"
|
|
4
5
|
require "smarter_csv/errors"
|
|
5
6
|
|
|
@@ -64,29 +65,149 @@ module SmarterCSV
|
|
|
64
65
|
# reader = SmarterCSV::Reader.new(input, options)
|
|
65
66
|
# reader.process # with or without block
|
|
66
67
|
#
|
|
68
|
+
# After calling any of the class-level methods, errors from the last run are available via:
|
|
69
|
+
#
|
|
70
|
+
# SmarterCSV.errors # => { bad_row_count: 2, bad_rows: [...] }
|
|
71
|
+
#
|
|
72
|
+
# This exposes the same reader.errors hash without requiring access to the Reader instance.
|
|
73
|
+
# Errors are cleared at the start of each call and stored per-thread, so this is safe in
|
|
74
|
+
# multi-threaded environments (Puma, Sidekiq). Note: only the most recent call's errors
|
|
75
|
+
# are retained per thread.
|
|
76
|
+
#
|
|
67
77
|
def self.process(input, given_options = {}, &block)
|
|
78
|
+
Thread.current[:current_thread_recent_errors] = {}
|
|
68
79
|
reader = Reader.new(input, given_options)
|
|
69
|
-
reader.process(&block)
|
|
80
|
+
result = reader.process(&block)
|
|
81
|
+
Thread.current[:current_thread_recent_errors] = reader.errors
|
|
82
|
+
result
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Convenience method for parsing a CSV string directly.
|
|
86
|
+
# Equivalent to SmarterCSV.process(StringIO.new(csv_string), options).
|
|
87
|
+
# Errors from the run are available via SmarterCSV.errors after the call.
|
|
88
|
+
#
|
|
89
|
+
# Example:
|
|
90
|
+
# data = SmarterCSV.parse("name,age\nAlice,30\nBob,25")
|
|
91
|
+
# # => [{name: "Alice", age: 30}, {name: "Bob", age: 25}]
|
|
92
|
+
#
|
|
93
|
+
# SmarterCSV.parse("name,age\nAlice,30") { |chunk| chunk.each { |h| puts h } }
|
|
94
|
+
#
|
|
95
|
+
def self.parse(csv_string, options = {}, &block)
|
|
96
|
+
process(StringIO.new(csv_string), options, &block)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Yields each successfully parsed row as a Hash (row-by-row, Enumerable-compatible).
|
|
100
|
+
# Returns an Enumerator when called without a block.
|
|
101
|
+
# When called with a block, errors from the run are available via SmarterCSV.errors after the call.
|
|
102
|
+
# When called without a block (Enumerator form), use SmarterCSV::Reader directly for error access.
|
|
103
|
+
#
|
|
104
|
+
# Examples:
|
|
105
|
+
# SmarterCSV.each("data.csv") { |hash| MyModel.upsert(hash) }
|
|
106
|
+
# SmarterCSV.each("data.csv").select { |h| h[:country] == "US" }
|
|
107
|
+
# SmarterCSV.each("data.csv").lazy.map { |h| h[:name] }.first(10)
|
|
108
|
+
def self.each(input, options = {}, &block)
|
|
109
|
+
Thread.current[:current_thread_recent_errors] = {}
|
|
110
|
+
reader = Reader.new(input, options)
|
|
111
|
+
result = reader.each(&block)
|
|
112
|
+
Thread.current[:current_thread_recent_errors] = reader.errors
|
|
113
|
+
result
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Yields each chunk as Array<Hash> plus its 0-based chunk index.
|
|
117
|
+
# Requires chunk_size to be set in options (must be >= 1).
|
|
118
|
+
# Returns an Enumerator when called without a block.
|
|
119
|
+
# When called with a block, errors from the run are available via SmarterCSV.errors after the call.
|
|
120
|
+
# When called without a block (Enumerator form), use SmarterCSV::Reader directly for error access.
|
|
121
|
+
#
|
|
122
|
+
# Examples:
|
|
123
|
+
# SmarterCSV.each_chunk("data.csv", chunk_size: 500) { |chunk, i| Sidekiq.push_bulk(chunk) }
|
|
124
|
+
# SmarterCSV.each_chunk("data.csv", chunk_size: 100).with_index { |chunk, i| ... }
|
|
125
|
+
def self.each_chunk(input, options = {}, &block)
|
|
126
|
+
Thread.current[:current_thread_recent_errors] = {}
|
|
127
|
+
reader = Reader.new(input, options)
|
|
128
|
+
result = reader.each_chunk(&block)
|
|
129
|
+
Thread.current[:current_thread_recent_errors] = reader.errors
|
|
130
|
+
result
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns the errors from the most recent call to .process, .parse, .each, or .each_chunk
|
|
134
|
+
# on the current thread. Cleared at the start of each new call.
|
|
135
|
+
#
|
|
136
|
+
# Keys (when on_bad_row: :skip or :collect is used):
|
|
137
|
+
# :bad_row_count — total number of bad rows encountered
|
|
138
|
+
# :bad_rows — array of error records (only with on_bad_row: :collect)
|
|
139
|
+
#
|
|
140
|
+
# Example:
|
|
141
|
+
# SmarterCSV.process('data.csv', on_bad_row: :skip)
|
|
142
|
+
# puts SmarterCSV.errors[:bad_row_count]
|
|
143
|
+
#
|
|
144
|
+
def self.errors
|
|
145
|
+
Thread.current[:current_thread_recent_errors] || {}
|
|
70
146
|
end
|
|
71
147
|
|
|
72
|
-
# Convenience method for generating CSV files
|
|
148
|
+
# Convenience method for generating CSV files, IO objects, or in-memory strings.
|
|
149
|
+
#
|
|
150
|
+
# When called WITHOUT a first argument, generates CSV in memory and returns it as a String.
|
|
151
|
+
# When called WITH a file path (String/Pathname) or any IO-compatible object (StringIO,
|
|
152
|
+
# open File handle, etc.), writes to that destination and returns nil.
|
|
153
|
+
# The caller retains ownership of any IO object passed in — SmarterCSV will not close it.
|
|
154
|
+
#
|
|
155
|
+
# Examples:
|
|
156
|
+
#
|
|
157
|
+
# # Return CSV as a String (no file argument)
|
|
158
|
+
# csv_string = SmarterCSV.generate(options) do |csv|
|
|
159
|
+
# records.each { |r| csv << r }
|
|
160
|
+
# end
|
|
161
|
+
#
|
|
162
|
+
# # Write to a file by path
|
|
163
|
+
# SmarterCSV.generate('output.csv', options) do |csv|
|
|
164
|
+
# MyModel.find_in_batches(batch_size: 100) do |batch|
|
|
165
|
+
# batch.each { |record| csv << record.attributes }
|
|
166
|
+
# end
|
|
167
|
+
# end
|
|
168
|
+
#
|
|
169
|
+
# # Write to a StringIO (e.g. for Rails streaming responses)
|
|
170
|
+
# io = StringIO.new
|
|
171
|
+
# SmarterCSV.generate(io) do |csv|
|
|
172
|
+
# records.each { |r| csv << r }
|
|
173
|
+
# end
|
|
174
|
+
# send_data io.string, type: 'text/csv'
|
|
73
175
|
#
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
#
|
|
176
|
+
# # Write to an already-open file handle
|
|
177
|
+
# File.open('output.csv', 'w') do |f|
|
|
178
|
+
# SmarterCSV.generate(f) do |csv|
|
|
179
|
+
# records.each { |r| csv << r }
|
|
78
180
|
# end
|
|
79
181
|
# end
|
|
80
|
-
# end
|
|
81
182
|
#
|
|
82
183
|
# rubocop:disable Lint/UnusedMethodArgument
|
|
83
|
-
def self.generate(
|
|
84
|
-
raise unless block_given?
|
|
184
|
+
def self.generate(file_path_or_io = nil, options = {}, &block)
|
|
185
|
+
raise ArgumentError, "SmarterCSV.generate requires a block" unless block_given?
|
|
85
186
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
187
|
+
# When called as generate(options_hash) { }, the hash lands in file_path_or_io
|
|
188
|
+
if file_path_or_io.is_a?(Hash)
|
|
189
|
+
options = file_path_or_io
|
|
190
|
+
file_path_or_io = nil
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
if file_path_or_io.nil?
|
|
194
|
+
# No destination given — write to an in-memory StringIO and return the result as a String.
|
|
195
|
+
io = StringIO.new
|
|
196
|
+
writer = Writer.new(io, options)
|
|
197
|
+
begin
|
|
198
|
+
yield writer
|
|
199
|
+
ensure
|
|
200
|
+
writer&.finalize # must finalize before reading io.string
|
|
201
|
+
end
|
|
202
|
+
io.string
|
|
203
|
+
else
|
|
204
|
+
writer = Writer.new(file_path_or_io, options)
|
|
205
|
+
begin
|
|
206
|
+
yield writer
|
|
207
|
+
ensure
|
|
208
|
+
writer&.finalize
|
|
209
|
+
end
|
|
210
|
+
end
|
|
90
211
|
end
|
|
91
212
|
# rubocop:enable Lint/UnusedMethodArgument
|
|
92
213
|
end
|
data/smarter_csv.gemspec
CHANGED
|
@@ -10,16 +10,33 @@ Gem::Specification.new do |spec|
|
|
|
10
10
|
spec.version = SmarterCSV::VERSION
|
|
11
11
|
spec.date = Time.now.utc.strftime('%Y-%m-%d')
|
|
12
12
|
|
|
13
|
-
spec.summary
|
|
14
|
-
spec.description
|
|
13
|
+
spec.summary = "Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready hash output"
|
|
14
|
+
spec.description = <<~DESC
|
|
15
|
+
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
16
|
+
fastest end-to-end ingestion — not just parsing. It returns ready-to-use
|
|
17
|
+
hashes with configurable header and value transformations, intelligent
|
|
18
|
+
defaults, and automatic delimiter discovery.
|
|
19
|
+
|
|
20
|
+
Built for real-world data pipelines, SmarterCSV supports chunked processing
|
|
21
|
+
for large files, streaming via Enumerable APIs, and C acceleration
|
|
22
|
+
to optimize the full ingestion path (parsing + hash construction +
|
|
23
|
+
conversions).
|
|
24
|
+
|
|
25
|
+
Designed to handle messy user-uploaded CSV while remaining easy to integrate
|
|
26
|
+
with Rails, ActiveRecord imports, Sidekiq jobs, parallel processing, and
|
|
27
|
+
S3-based workflows.
|
|
28
|
+
DESC
|
|
29
|
+
|
|
15
30
|
spec.homepage = "https://github.com/tilo/smarter_csv"
|
|
16
31
|
spec.license = 'MIT'
|
|
17
32
|
|
|
18
33
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
34
|
spec.metadata["source_code_uri"] = spec.homepage
|
|
20
35
|
spec.metadata["changelog_uri"] = "https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md"
|
|
36
|
+
spec.metadata["documentation_uri"] = "https://github.com/tilo/smarter_csv/tree/main/docs"
|
|
37
|
+
spec.metadata["bug_tracker_uri"] = "https://github.com/tilo/smarter_csv/issues"
|
|
21
38
|
|
|
22
|
-
spec.required_ruby_version = ">= 2.
|
|
39
|
+
spec.required_ruby_version = ">= 2.6.0"
|
|
23
40
|
|
|
24
41
|
# Specify which files should be added to the gem when it is released.
|
|
25
42
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
@@ -29,16 +46,9 @@ Gem::Specification.new do |spec|
|
|
|
29
46
|
f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)}) || f.match(/\.h\z/)
|
|
30
47
|
end
|
|
31
48
|
end
|
|
32
|
-
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
33
|
-
|
|
34
49
|
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
35
50
|
spec.require_paths = %w[lib ext]
|
|
36
51
|
spec.extensions = ["ext/smarter_csv/extconf.rb"]
|
|
37
52
|
spec.files += Dir.glob("ext/smarter_csv/**/*")
|
|
38
53
|
|
|
39
|
-
spec.add_development_dependency "awesome_print"
|
|
40
|
-
spec.add_development_dependency "pry"
|
|
41
|
-
spec.add_development_dependency "rspec"
|
|
42
|
-
spec.add_development_dependency "rubocop"
|
|
43
|
-
spec.add_development_dependency "simplecov"
|
|
44
54
|
end
|
metadata
CHANGED
|
@@ -1,89 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.16.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
11
|
-
dependencies:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
- !ruby/object:Gem::Dependency
|
|
27
|
-
name: pry
|
|
28
|
-
requirement: !ruby/object:Gem::Requirement
|
|
29
|
-
requirements:
|
|
30
|
-
- - ">="
|
|
31
|
-
- !ruby/object:Gem::Version
|
|
32
|
-
version: '0'
|
|
33
|
-
type: :development
|
|
34
|
-
prerelease: false
|
|
35
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
-
requirements:
|
|
37
|
-
- - ">="
|
|
38
|
-
- !ruby/object:Gem::Version
|
|
39
|
-
version: '0'
|
|
40
|
-
- !ruby/object:Gem::Dependency
|
|
41
|
-
name: rspec
|
|
42
|
-
requirement: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - ">="
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '0'
|
|
47
|
-
type: :development
|
|
48
|
-
prerelease: false
|
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
-
requirements:
|
|
51
|
-
- - ">="
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0'
|
|
54
|
-
- !ruby/object:Gem::Dependency
|
|
55
|
-
name: rubocop
|
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
|
57
|
-
requirements:
|
|
58
|
-
- - ">="
|
|
59
|
-
- !ruby/object:Gem::Version
|
|
60
|
-
version: '0'
|
|
61
|
-
type: :development
|
|
62
|
-
prerelease: false
|
|
63
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
-
requirements:
|
|
65
|
-
- - ">="
|
|
66
|
-
- !ruby/object:Gem::Version
|
|
67
|
-
version: '0'
|
|
68
|
-
- !ruby/object:Gem::Dependency
|
|
69
|
-
name: simplecov
|
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
|
71
|
-
requirements:
|
|
72
|
-
- - ">="
|
|
73
|
-
- !ruby/object:Gem::Version
|
|
74
|
-
version: '0'
|
|
75
|
-
type: :development
|
|
76
|
-
prerelease: false
|
|
77
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
-
requirements:
|
|
79
|
-
- - ">="
|
|
80
|
-
- !ruby/object:Gem::Version
|
|
81
|
-
version: '0'
|
|
82
|
-
description: Ruby Gem for convenient reading and writing of CSV files. It has intelligent
|
|
83
|
-
defaults, and auto-discovery of column and row separators. It imports CSV Files
|
|
84
|
-
as Array(s) of Hashes, suitable for direct processing with ActiveRecord, kicking-off
|
|
85
|
-
batch jobs with Sidekiq, parallel processing, or oploading data to S3. Similarly,
|
|
86
|
-
writing CSV files takes Hashes, or Arrays of Hashes to create a CSV file.
|
|
10
|
+
date: 2026-03-16 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: |
|
|
13
|
+
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
14
|
+
fastest end-to-end ingestion — not just parsing. It returns ready-to-use
|
|
15
|
+
hashes with configurable header and value transformations, intelligent
|
|
16
|
+
defaults, and automatic delimiter discovery.
|
|
17
|
+
|
|
18
|
+
Built for real-world data pipelines, SmarterCSV supports chunked processing
|
|
19
|
+
for large files, streaming via Enumerable APIs, and C acceleration
|
|
20
|
+
to optimize the full ingestion path (parsing + hash construction +
|
|
21
|
+
conversions).
|
|
22
|
+
|
|
23
|
+
Designed to handle messy user-uploaded CSV while remaining easy to integrate
|
|
24
|
+
with Rails, ActiveRecord imports, Sidekiq jobs, parallel processing, and
|
|
25
|
+
S3-based workflows.
|
|
87
26
|
email:
|
|
88
27
|
- tilo.sloboda@gmail.com
|
|
89
28
|
executables: []
|
|
@@ -102,16 +41,26 @@ files:
|
|
|
102
41
|
- Rakefile
|
|
103
42
|
- TO_DO_v2.md
|
|
104
43
|
- docs/_introduction.md
|
|
44
|
+
- docs/bad_row_quarantine.md
|
|
105
45
|
- docs/basic_read_api.md
|
|
106
46
|
- docs/basic_write_api.md
|
|
107
47
|
- docs/batch_processing.md
|
|
48
|
+
- docs/column_selection.md
|
|
108
49
|
- docs/data_transformations.md
|
|
109
50
|
- docs/examples.md
|
|
110
51
|
- docs/header_transformations.md
|
|
111
52
|
- docs/header_validations.md
|
|
53
|
+
- docs/history.md
|
|
54
|
+
- docs/instrumentation.md
|
|
55
|
+
- docs/migrating_from_csv.md
|
|
112
56
|
- docs/options.md
|
|
113
57
|
- docs/parsing_strategy.md
|
|
58
|
+
- docs/real_world_csv.md
|
|
59
|
+
- docs/releases/1.16.0/benchmarks.md
|
|
60
|
+
- docs/releases/1.16.0/changes.md
|
|
61
|
+
- docs/releases/1.16.0/performance_notes.md
|
|
114
62
|
- docs/row_col_sep.md
|
|
63
|
+
- docs/ruby_csv_pitfalls.md
|
|
115
64
|
- docs/value_converters.md
|
|
116
65
|
- ext/smarter_csv/Makefile
|
|
117
66
|
- ext/smarter_csv/extconf.rb
|
|
@@ -121,6 +70,12 @@ files:
|
|
|
121
70
|
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml
|
|
122
71
|
- ext/smarter_csv/smarter_csv.c
|
|
123
72
|
- ext/smarter_csv/smarter_csv.o
|
|
73
|
+
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
|
|
74
|
+
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
|
|
75
|
+
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
|
|
76
|
+
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.svg
|
|
77
|
+
- images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.png
|
|
78
|
+
- images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.svg
|
|
124
79
|
- lib/smarter_csv.rb
|
|
125
80
|
- lib/smarter_csv/auto_detection.rb
|
|
126
81
|
- lib/smarter_csv/errors.rb
|
|
@@ -142,6 +97,8 @@ metadata:
|
|
|
142
97
|
homepage_uri: https://github.com/tilo/smarter_csv
|
|
143
98
|
source_code_uri: https://github.com/tilo/smarter_csv
|
|
144
99
|
changelog_uri: https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md
|
|
100
|
+
documentation_uri: https://github.com/tilo/smarter_csv/tree/main/docs
|
|
101
|
+
bug_tracker_uri: https://github.com/tilo/smarter_csv/issues
|
|
145
102
|
rdoc_options: []
|
|
146
103
|
require_paths:
|
|
147
104
|
- lib
|
|
@@ -150,7 +107,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
150
107
|
requirements:
|
|
151
108
|
- - ">="
|
|
152
109
|
- !ruby/object:Gem::Version
|
|
153
|
-
version: 2.
|
|
110
|
+
version: 2.6.0
|
|
154
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
112
|
requirements:
|
|
156
113
|
- - ">="
|
|
@@ -159,5 +116,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
159
116
|
requirements: []
|
|
160
117
|
rubygems_version: 4.0.6
|
|
161
118
|
specification_version: 4
|
|
162
|
-
summary:
|
|
119
|
+
summary: Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready
|
|
120
|
+
hash output
|
|
163
121
|
test_files: []
|