smarter_csv 1.15.2 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +68 -1
- data/CONTRIBUTORS.md +3 -1
- data/Gemfile +1 -0
- data/README.md +123 -27
- data/docs/_introduction.md +40 -24
- data/docs/bad_row_quarantine.md +285 -0
- data/docs/basic_read_api.md +151 -9
- data/docs/basic_write_api.md +474 -59
- data/docs/batch_processing.md +161 -4
- data/docs/column_selection.md +183 -0
- data/docs/data_transformations.md +162 -29
- data/docs/examples.md +339 -46
- data/docs/header_transformations.md +93 -12
- data/docs/header_validations.md +56 -18
- data/docs/history.md +117 -0
- data/docs/instrumentation.md +165 -0
- data/docs/migrating_from_csv.md +290 -0
- data/docs/options.md +150 -87
- data/docs/parsing_strategy.md +63 -1
- data/docs/real_world_csv.md +262 -0
- data/docs/releases/1.16.0/benchmarks.md +223 -0
- data/docs/releases/1.16.0/changes.md +272 -0
- data/docs/releases/1.16.0/performance_notes.md +114 -0
- data/docs/row_col_sep.md +14 -5
- data/docs/value_converters.md +193 -57
- data/ext/smarter_csv/extconf.rb +3 -0
- data/ext/smarter_csv/smarter_csv.c +1007 -71
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg +108 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.svg +141 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.svg +139 -0
- data/lib/smarter_csv/errors.rb +8 -0
- data/lib/smarter_csv/file_io.rb +1 -1
- data/lib/smarter_csv/hash_transformations.rb +14 -13
- data/lib/smarter_csv/header_transformations.rb +21 -2
- data/lib/smarter_csv/headers.rb +2 -1
- data/lib/smarter_csv/options.rb +124 -7
- data/lib/smarter_csv/parser.rb +362 -75
- data/lib/smarter_csv/reader.rb +494 -46
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +71 -19
- data/lib/smarter_csv.rb +95 -12
- data/smarter_csv.gemspec +20 -10
- metadata +37 -80
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv/writer.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'tempfile'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
require 'set'
|
|
4
6
|
|
|
5
7
|
module SmarterCSV
|
|
6
8
|
#
|
|
@@ -33,6 +35,13 @@ module SmarterCSV
|
|
|
33
35
|
# force_quotes: defaults to false
|
|
34
36
|
# map_headers: defaults to {}, can be a hash of key -> value mappings
|
|
35
37
|
# value_converters: optional hash of key -> lambda to control serialization
|
|
38
|
+
# encoding: optional encoding string for the output file, e.g. 'UTF-8', 'ISO-8859-1'
|
|
39
|
+
# supports Ruby's 'external:internal' transcoding notation, e.g. 'ISO-8859-1:UTF-8'
|
|
40
|
+
# defaults to nil (system default). Only applies when writing to a file path.
|
|
41
|
+
# write_nil_value: string written in place of nil field values (default: '')
|
|
42
|
+
# write_empty_value: string written in place of empty-string field values (default: '')
|
|
43
|
+
# write_bom: when true, prepends a UTF-8 BOM (\xEF\xBB\xBF) to the output (default: false)
|
|
44
|
+
# Useful for Excel compatibility with non-ASCII content.
|
|
36
45
|
|
|
37
46
|
# IMPORTANT NOTES:
|
|
38
47
|
# * Data hashes could contain strings or symbols as keys.
|
|
@@ -42,18 +51,23 @@ module SmarterCSV
|
|
|
42
51
|
attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
|
|
43
52
|
|
|
44
53
|
class Writer
|
|
45
|
-
def initialize(
|
|
54
|
+
def initialize(file_path_or_io, options = {})
|
|
46
55
|
@options = options
|
|
47
56
|
|
|
48
57
|
@row_sep = options[:row_sep] || $/
|
|
49
58
|
@col_sep = options[:col_sep] || ','
|
|
50
59
|
@quote_char = options[:quote_char] || '"'
|
|
60
|
+
@escaped_quote_char = @quote_char * 2
|
|
51
61
|
@force_quotes = options[:force_quotes] == true
|
|
52
62
|
@quote_headers = options[:quote_headers] == true
|
|
53
63
|
@disable_auto_quoting = options[:disable_auto_quoting] == true
|
|
54
64
|
@value_converters = options[:value_converters] || {}
|
|
65
|
+
@encoding = options[:encoding]
|
|
66
|
+
@write_nil_value = options.fetch(:write_nil_value, '')
|
|
67
|
+
@write_empty_value = options.fetch(:write_empty_value, '')
|
|
68
|
+
@write_bom = options[:write_bom] == true
|
|
55
69
|
@map_all_keys = @value_converters.has_key?(:_all)
|
|
56
|
-
@mapped_keys = @value_converters.keys - [:_all]
|
|
70
|
+
@mapped_keys = Set.new(@value_converters.keys - [:_all])
|
|
57
71
|
@header_converter = options[:header_converter]
|
|
58
72
|
|
|
59
73
|
@discover_headers = true
|
|
@@ -68,9 +82,38 @@ module SmarterCSV
|
|
|
68
82
|
@headers = options[:map_headers].keys if options.has_key?(:map_headers) && !options.has_key?(:headers)
|
|
69
83
|
@map_headers = options[:map_headers] || {}
|
|
70
84
|
|
|
71
|
-
|
|
72
|
-
|
|
85
|
+
# Accept an IO-like object (StringIO, IO, etc.) or any path-like object (String, Pathname, etc.)
|
|
86
|
+
if file_path_or_io.respond_to?(:write)
|
|
87
|
+
# External IO handed in — we should not close it ourselves.
|
|
88
|
+
@output_file = file_path_or_io
|
|
89
|
+
@file_opened_by_us = false
|
|
90
|
+
else
|
|
91
|
+
path =
|
|
92
|
+
if file_path_or_io.respond_to?(:to_path)
|
|
93
|
+
file_path_or_io.to_path
|
|
94
|
+
elsif file_path_or_io.is_a?(String)
|
|
95
|
+
file_path_or_io
|
|
96
|
+
else
|
|
97
|
+
raise ArgumentError,
|
|
98
|
+
"SmarterCSV::Writer expects an IO-like object (responding to #write) " \
|
|
99
|
+
"or a path-like object (responding to #to_path or being a String), " \
|
|
100
|
+
"but got #{file_path_or_io.class}"
|
|
101
|
+
end
|
|
102
|
+
mode = @encoding ? "w+:#{@encoding}" : 'w+'
|
|
103
|
+
@output_file = File.open(path, mode)
|
|
104
|
+
@file_opened_by_us = true
|
|
105
|
+
end
|
|
73
106
|
@quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char)
|
|
107
|
+
|
|
108
|
+
if !@discover_headers && !@headers.empty?
|
|
109
|
+
# Headers are fully known at construction time — write the header line immediately
|
|
110
|
+
# and stream data rows directly to @output_file, bypassing the temp file entirely.
|
|
111
|
+
@temp_file = nil
|
|
112
|
+
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
113
|
+
write_header_line
|
|
114
|
+
else
|
|
115
|
+
@temp_file = Tempfile.new('smarter_csv')
|
|
116
|
+
end
|
|
74
117
|
end
|
|
75
118
|
|
|
76
119
|
def <<(data)
|
|
@@ -82,30 +125,36 @@ module SmarterCSV
|
|
|
82
125
|
when NilClass
|
|
83
126
|
# ignore
|
|
84
127
|
else
|
|
85
|
-
# :nocov:
|
|
86
128
|
raise InvalidInputData, "Invalid data type: #{data.class}. Must be a Hash or an Array."
|
|
87
|
-
# :nocov:
|
|
88
129
|
end
|
|
89
130
|
end
|
|
90
131
|
|
|
91
132
|
def finalize
|
|
92
|
-
|
|
133
|
+
if @temp_file
|
|
134
|
+
# Header-discovery mode: headers were accumulated while writing rows;
|
|
135
|
+
# now prepend the header line and copy the buffered rows to the output.
|
|
136
|
+
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
137
|
+
write_header_line
|
|
138
|
+
@temp_file.rewind
|
|
139
|
+
@output_file.write(@temp_file.read)
|
|
140
|
+
@temp_file.close!
|
|
141
|
+
end
|
|
142
|
+
# In direct-write mode (@temp_file == nil) the header line and all data rows
|
|
143
|
+
# were already written to @output_file — nothing left to do but flush and close.
|
|
144
|
+
@output_file.flush
|
|
145
|
+
@output_file.close if @file_opened_by_us # only close files we opened; caller owns external IO objects
|
|
146
|
+
end
|
|
93
147
|
|
|
94
|
-
|
|
148
|
+
private
|
|
95
149
|
|
|
150
|
+
def write_header_line
|
|
151
|
+
mapped_headers = @headers.map { |header| @map_headers[header] || header }
|
|
152
|
+
mapped_headers = @headers.map { |header| @header_converter.call(header) } if @header_converter
|
|
96
153
|
force_quotes = @quote_headers || @force_quotes
|
|
97
154
|
mapped_headers = mapped_headers.map { |x| escape_csv_field(x, force_quotes) }
|
|
98
|
-
|
|
99
|
-
@temp_file.rewind
|
|
100
155
|
@output_file.write(mapped_headers.join(@col_sep) + @row_sep) unless mapped_headers.empty?
|
|
101
|
-
@output_file.write(@temp_file.read)
|
|
102
|
-
@output_file.flush
|
|
103
|
-
@output_file.close
|
|
104
|
-
@temp_file.delete
|
|
105
156
|
end
|
|
106
157
|
|
|
107
|
-
private
|
|
108
|
-
|
|
109
158
|
def process_hash(hash)
|
|
110
159
|
if @discover_headers
|
|
111
160
|
hash_keys = hash.keys
|
|
@@ -124,10 +173,13 @@ module SmarterCSV
|
|
|
124
173
|
# then apply general mapping rules
|
|
125
174
|
value = map_all_values(header, value) if @map_all_keys
|
|
126
175
|
|
|
176
|
+
value = @write_nil_value if value.nil?
|
|
177
|
+
value = @write_empty_value if !value.nil? && value.respond_to?(:empty?) && value.empty?
|
|
178
|
+
|
|
127
179
|
escape_csv_field(value, @force_quotes) # for backwards compatibility
|
|
128
180
|
end
|
|
129
181
|
|
|
130
|
-
@temp_file.write(ordered_row.join(@col_sep)
|
|
182
|
+
(@temp_file || @output_file).write(ordered_row.join(@col_sep) << @row_sep) unless ordered_row.empty?
|
|
131
183
|
end
|
|
132
184
|
|
|
133
185
|
def map_value(key, value)
|
|
@@ -143,9 +195,9 @@ module SmarterCSV
|
|
|
143
195
|
return str if @disable_auto_quoting && !force_quotes
|
|
144
196
|
|
|
145
197
|
# double-quote fields if we force that, or if the field contains the comma, new-line, or quote character
|
|
146
|
-
contains_special_char = str.
|
|
198
|
+
contains_special_char = str.match(@quote_regex)
|
|
147
199
|
if force_quotes || contains_special_char
|
|
148
|
-
str = str.gsub(@quote_char, @
|
|
200
|
+
str = str.gsub(@quote_char, @escaped_quote_char) if contains_special_char # escape double-quote
|
|
149
201
|
|
|
150
202
|
"\"#{str}\""
|
|
151
203
|
else
|
data/lib/smarter_csv.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'stringio'
|
|
3
4
|
require "smarter_csv/version"
|
|
4
5
|
require "smarter_csv/errors"
|
|
5
6
|
|
|
@@ -69,24 +70,106 @@ module SmarterCSV
|
|
|
69
70
|
reader.process(&block)
|
|
70
71
|
end
|
|
71
72
|
|
|
72
|
-
# Convenience method for
|
|
73
|
+
# Convenience method for parsing a CSV string directly.
|
|
74
|
+
# Equivalent to SmarterCSV.process(StringIO.new(csv_string), options).
|
|
73
75
|
#
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
#
|
|
76
|
+
# Example:
|
|
77
|
+
# data = SmarterCSV.parse("name,age\nAlice,30\nBob,25")
|
|
78
|
+
# # => [{name: "Alice", age: 30}, {name: "Bob", age: 25}]
|
|
79
|
+
#
|
|
80
|
+
# SmarterCSV.parse("name,age\nAlice,30") { |chunk| chunk.each { |h| puts h } }
|
|
81
|
+
#
|
|
82
|
+
def self.parse(csv_string, options = {}, &block)
|
|
83
|
+
process(StringIO.new(csv_string), options, &block)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Yields each successfully parsed row as a Hash (row-by-row, Enumerable-compatible).
|
|
87
|
+
# Returns an Enumerator when called without a block.
|
|
88
|
+
#
|
|
89
|
+
# Examples:
|
|
90
|
+
# SmarterCSV.each("data.csv") { |hash| MyModel.upsert(hash) }
|
|
91
|
+
# SmarterCSV.each("data.csv").select { |h| h[:country] == "US" }
|
|
92
|
+
# SmarterCSV.each("data.csv").lazy.map { |h| h[:name] }.first(10)
|
|
93
|
+
def self.each(input, options = {}, &block)
|
|
94
|
+
reader = Reader.new(input, options)
|
|
95
|
+
reader.each(&block)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Yields each chunk as Array<Hash> plus its 0-based chunk index.
|
|
99
|
+
# Requires chunk_size to be set in options (must be >= 1).
|
|
100
|
+
# Returns an Enumerator when called without a block.
|
|
101
|
+
#
|
|
102
|
+
# Examples:
|
|
103
|
+
# SmarterCSV.each_chunk("data.csv", chunk_size: 500) { |chunk, i| Sidekiq.push_bulk(chunk) }
|
|
104
|
+
# SmarterCSV.each_chunk("data.csv", chunk_size: 100).with_index { |chunk, i| ... }
|
|
105
|
+
def self.each_chunk(input, options = {}, &block)
|
|
106
|
+
reader = Reader.new(input, options)
|
|
107
|
+
reader.each_chunk(&block)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Convenience method for generating CSV files, IO objects, or in-memory strings.
|
|
111
|
+
#
|
|
112
|
+
# When called WITHOUT a first argument, generates CSV in memory and returns it as a String.
|
|
113
|
+
# When called WITH a file path (String/Pathname) or any IO-compatible object (StringIO,
|
|
114
|
+
# open File handle, etc.), writes to that destination and returns nil.
|
|
115
|
+
# The caller retains ownership of any IO object passed in — SmarterCSV will not close it.
|
|
116
|
+
#
|
|
117
|
+
# Examples:
|
|
118
|
+
#
|
|
119
|
+
# # Return CSV as a String (no file argument)
|
|
120
|
+
# csv_string = SmarterCSV.generate(options) do |csv|
|
|
121
|
+
# records.each { |r| csv << r }
|
|
122
|
+
# end
|
|
123
|
+
#
|
|
124
|
+
# # Write to a file by path
|
|
125
|
+
# SmarterCSV.generate('output.csv', options) do |csv|
|
|
126
|
+
# MyModel.find_in_batches(batch_size: 100) do |batch|
|
|
127
|
+
# batch.each { |record| csv << record.attributes }
|
|
128
|
+
# end
|
|
129
|
+
# end
|
|
130
|
+
#
|
|
131
|
+
# # Write to a StringIO (e.g. for Rails streaming responses)
|
|
132
|
+
# io = StringIO.new
|
|
133
|
+
# SmarterCSV.generate(io) do |csv|
|
|
134
|
+
# records.each { |r| csv << r }
|
|
135
|
+
# end
|
|
136
|
+
# send_data io.string, type: 'text/csv'
|
|
137
|
+
#
|
|
138
|
+
# # Write to an already-open file handle
|
|
139
|
+
# File.open('output.csv', 'w') do |f|
|
|
140
|
+
# SmarterCSV.generate(f) do |csv|
|
|
141
|
+
# records.each { |r| csv << r }
|
|
78
142
|
# end
|
|
79
143
|
# end
|
|
80
|
-
# end
|
|
81
144
|
#
|
|
82
145
|
# rubocop:disable Lint/UnusedMethodArgument
|
|
83
|
-
def self.generate(
|
|
84
|
-
raise unless block_given?
|
|
146
|
+
def self.generate(file_path_or_io = nil, options = {}, &block)
|
|
147
|
+
raise ArgumentError, "SmarterCSV.generate requires a block" unless block_given?
|
|
148
|
+
|
|
149
|
+
# When called as generate(options_hash) { }, the hash lands in file_path_or_io
|
|
150
|
+
if file_path_or_io.is_a?(Hash)
|
|
151
|
+
options = file_path_or_io
|
|
152
|
+
file_path_or_io = nil
|
|
153
|
+
end
|
|
85
154
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
155
|
+
if file_path_or_io.nil?
|
|
156
|
+
# No destination given — write to an in-memory StringIO and return the result as a String.
|
|
157
|
+
io = StringIO.new
|
|
158
|
+
writer = Writer.new(io, options)
|
|
159
|
+
begin
|
|
160
|
+
yield writer
|
|
161
|
+
ensure
|
|
162
|
+
writer&.finalize # must finalize before reading io.string
|
|
163
|
+
end
|
|
164
|
+
io.string
|
|
165
|
+
else
|
|
166
|
+
writer = Writer.new(file_path_or_io, options)
|
|
167
|
+
begin
|
|
168
|
+
yield writer
|
|
169
|
+
ensure
|
|
170
|
+
writer&.finalize
|
|
171
|
+
end
|
|
172
|
+
end
|
|
90
173
|
end
|
|
91
174
|
# rubocop:enable Lint/UnusedMethodArgument
|
|
92
175
|
end
|
data/smarter_csv.gemspec
CHANGED
|
@@ -10,16 +10,33 @@ Gem::Specification.new do |spec|
|
|
|
10
10
|
spec.version = SmarterCSV::VERSION
|
|
11
11
|
spec.date = Time.now.utc.strftime('%Y-%m-%d')
|
|
12
12
|
|
|
13
|
-
spec.summary
|
|
14
|
-
spec.description
|
|
13
|
+
spec.summary = "Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready hash output"
|
|
14
|
+
spec.description = <<~DESC
|
|
15
|
+
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
16
|
+
fastest end-to-end ingestion — not just parsing. It returns ready-to-use
|
|
17
|
+
hashes with configurable header and value transformations, intelligent
|
|
18
|
+
defaults, and automatic delimiter discovery.
|
|
19
|
+
|
|
20
|
+
Built for real-world data pipelines, SmarterCSV supports chunked processing
|
|
21
|
+
for large files, streaming via Enumerable APIs, and C acceleration
|
|
22
|
+
to optimize the full ingestion path (parsing + hash construction +
|
|
23
|
+
conversions).
|
|
24
|
+
|
|
25
|
+
Designed to handle messy user-uploaded CSV while remaining easy to integrate
|
|
26
|
+
with Rails, ActiveRecord imports, Sidekiq jobs, parallel processing, and
|
|
27
|
+
S3-based workflows.
|
|
28
|
+
DESC
|
|
29
|
+
|
|
15
30
|
spec.homepage = "https://github.com/tilo/smarter_csv"
|
|
16
31
|
spec.license = 'MIT'
|
|
17
32
|
|
|
18
33
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
34
|
spec.metadata["source_code_uri"] = spec.homepage
|
|
20
35
|
spec.metadata["changelog_uri"] = "https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md"
|
|
36
|
+
spec.metadata["documentation_uri"] = "https://github.com/tilo/smarter_csv/tree/main/docs"
|
|
37
|
+
spec.metadata["bug_tracker_uri"] = "https://github.com/tilo/smarter_csv/issues"
|
|
21
38
|
|
|
22
|
-
spec.required_ruby_version = ">= 2.
|
|
39
|
+
spec.required_ruby_version = ">= 2.6.0"
|
|
23
40
|
|
|
24
41
|
# Specify which files should be added to the gem when it is released.
|
|
25
42
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
@@ -29,16 +46,9 @@ Gem::Specification.new do |spec|
|
|
|
29
46
|
f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)}) || f.match(/\.h\z/)
|
|
30
47
|
end
|
|
31
48
|
end
|
|
32
|
-
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
33
|
-
|
|
34
49
|
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
35
50
|
spec.require_paths = %w[lib ext]
|
|
36
51
|
spec.extensions = ["ext/smarter_csv/extconf.rb"]
|
|
37
52
|
spec.files += Dir.glob("ext/smarter_csv/**/*")
|
|
38
53
|
|
|
39
|
-
spec.add_development_dependency "awesome_print"
|
|
40
|
-
spec.add_development_dependency "pry"
|
|
41
|
-
spec.add_development_dependency "rspec"
|
|
42
|
-
spec.add_development_dependency "rubocop"
|
|
43
|
-
spec.add_development_dependency "simplecov"
|
|
44
54
|
end
|
metadata
CHANGED
|
@@ -1,89 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.16.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
11
|
-
dependencies:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
- !ruby/object:Gem::Dependency
|
|
27
|
-
name: pry
|
|
28
|
-
requirement: !ruby/object:Gem::Requirement
|
|
29
|
-
requirements:
|
|
30
|
-
- - ">="
|
|
31
|
-
- !ruby/object:Gem::Version
|
|
32
|
-
version: '0'
|
|
33
|
-
type: :development
|
|
34
|
-
prerelease: false
|
|
35
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
-
requirements:
|
|
37
|
-
- - ">="
|
|
38
|
-
- !ruby/object:Gem::Version
|
|
39
|
-
version: '0'
|
|
40
|
-
- !ruby/object:Gem::Dependency
|
|
41
|
-
name: rspec
|
|
42
|
-
requirement: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - ">="
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '0'
|
|
47
|
-
type: :development
|
|
48
|
-
prerelease: false
|
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
-
requirements:
|
|
51
|
-
- - ">="
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0'
|
|
54
|
-
- !ruby/object:Gem::Dependency
|
|
55
|
-
name: rubocop
|
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
|
57
|
-
requirements:
|
|
58
|
-
- - ">="
|
|
59
|
-
- !ruby/object:Gem::Version
|
|
60
|
-
version: '0'
|
|
61
|
-
type: :development
|
|
62
|
-
prerelease: false
|
|
63
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
-
requirements:
|
|
65
|
-
- - ">="
|
|
66
|
-
- !ruby/object:Gem::Version
|
|
67
|
-
version: '0'
|
|
68
|
-
- !ruby/object:Gem::Dependency
|
|
69
|
-
name: simplecov
|
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
|
71
|
-
requirements:
|
|
72
|
-
- - ">="
|
|
73
|
-
- !ruby/object:Gem::Version
|
|
74
|
-
version: '0'
|
|
75
|
-
type: :development
|
|
76
|
-
prerelease: false
|
|
77
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
-
requirements:
|
|
79
|
-
- - ">="
|
|
80
|
-
- !ruby/object:Gem::Version
|
|
81
|
-
version: '0'
|
|
82
|
-
description: Ruby Gem for convenient reading and writing of CSV files. It has intelligent
|
|
83
|
-
defaults, and auto-discovery of column and row separators. It imports CSV Files
|
|
84
|
-
as Array(s) of Hashes, suitable for direct processing with ActiveRecord, kicking-off
|
|
85
|
-
batch jobs with Sidekiq, parallel processing, or oploading data to S3. Similarly,
|
|
86
|
-
writing CSV files takes Hashes, or Arrays of Hashes to create a CSV file.
|
|
10
|
+
date: 2026-03-13 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: |
|
|
13
|
+
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
14
|
+
fastest end-to-end ingestion — not just parsing. It returns ready-to-use
|
|
15
|
+
hashes with configurable header and value transformations, intelligent
|
|
16
|
+
defaults, and automatic delimiter discovery.
|
|
17
|
+
|
|
18
|
+
Built for real-world data pipelines, SmarterCSV supports chunked processing
|
|
19
|
+
for large files, streaming via Enumerable APIs, and C acceleration
|
|
20
|
+
to optimize the full ingestion path (parsing + hash construction +
|
|
21
|
+
conversions).
|
|
22
|
+
|
|
23
|
+
Designed to handle messy user-uploaded CSV while remaining easy to integrate
|
|
24
|
+
with Rails, ActiveRecord imports, Sidekiq jobs, parallel processing, and
|
|
25
|
+
S3-based workflows.
|
|
87
26
|
email:
|
|
88
27
|
- tilo.sloboda@gmail.com
|
|
89
28
|
executables: []
|
|
@@ -102,15 +41,24 @@ files:
|
|
|
102
41
|
- Rakefile
|
|
103
42
|
- TO_DO_v2.md
|
|
104
43
|
- docs/_introduction.md
|
|
44
|
+
- docs/bad_row_quarantine.md
|
|
105
45
|
- docs/basic_read_api.md
|
|
106
46
|
- docs/basic_write_api.md
|
|
107
47
|
- docs/batch_processing.md
|
|
48
|
+
- docs/column_selection.md
|
|
108
49
|
- docs/data_transformations.md
|
|
109
50
|
- docs/examples.md
|
|
110
51
|
- docs/header_transformations.md
|
|
111
52
|
- docs/header_validations.md
|
|
53
|
+
- docs/history.md
|
|
54
|
+
- docs/instrumentation.md
|
|
55
|
+
- docs/migrating_from_csv.md
|
|
112
56
|
- docs/options.md
|
|
113
57
|
- docs/parsing_strategy.md
|
|
58
|
+
- docs/real_world_csv.md
|
|
59
|
+
- docs/releases/1.16.0/benchmarks.md
|
|
60
|
+
- docs/releases/1.16.0/changes.md
|
|
61
|
+
- docs/releases/1.16.0/performance_notes.md
|
|
114
62
|
- docs/row_col_sep.md
|
|
115
63
|
- docs/value_converters.md
|
|
116
64
|
- ext/smarter_csv/Makefile
|
|
@@ -121,6 +69,12 @@ files:
|
|
|
121
69
|
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml
|
|
122
70
|
- ext/smarter_csv/smarter_csv.c
|
|
123
71
|
- ext/smarter_csv/smarter_csv.o
|
|
72
|
+
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
|
|
73
|
+
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
|
|
74
|
+
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
|
|
75
|
+
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.svg
|
|
76
|
+
- images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.png
|
|
77
|
+
- images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.svg
|
|
124
78
|
- lib/smarter_csv.rb
|
|
125
79
|
- lib/smarter_csv/auto_detection.rb
|
|
126
80
|
- lib/smarter_csv/errors.rb
|
|
@@ -142,6 +96,8 @@ metadata:
|
|
|
142
96
|
homepage_uri: https://github.com/tilo/smarter_csv
|
|
143
97
|
source_code_uri: https://github.com/tilo/smarter_csv
|
|
144
98
|
changelog_uri: https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md
|
|
99
|
+
documentation_uri: https://github.com/tilo/smarter_csv/tree/main/docs
|
|
100
|
+
bug_tracker_uri: https://github.com/tilo/smarter_csv/issues
|
|
145
101
|
rdoc_options: []
|
|
146
102
|
require_paths:
|
|
147
103
|
- lib
|
|
@@ -150,7 +106,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
150
106
|
requirements:
|
|
151
107
|
- - ">="
|
|
152
108
|
- !ruby/object:Gem::Version
|
|
153
|
-
version: 2.
|
|
109
|
+
version: 2.6.0
|
|
154
110
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
111
|
requirements:
|
|
156
112
|
- - ">="
|
|
@@ -159,5 +115,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
159
115
|
requirements: []
|
|
160
116
|
rubygems_version: 4.0.6
|
|
161
117
|
specification_version: 4
|
|
162
|
-
summary:
|
|
118
|
+
summary: Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready
|
|
119
|
+
hash output
|
|
163
120
|
test_files: []
|