vfcsv 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.tool-versions +1 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +21 -0
- data/README.md +268 -0
- data/Rakefile +37 -0
- data/bench/run_all_jit.sh +20 -0
- data/bench/vs_competitors.rb +253 -0
- data/bench/vs_stdlib.rb +137 -0
- data/ext/vfcsv_rust/Cargo.lock +289 -0
- data/ext/vfcsv_rust/Cargo.toml +27 -0
- data/ext/vfcsv_rust/extconf.rb +6 -0
- data/ext/vfcsv_rust/src/lib.rs +476 -0
- data/lib/vfcsv/row.rb +296 -0
- data/lib/vfcsv/table.rb +270 -0
- data/lib/vfcsv/version.rb +5 -0
- data/lib/vfcsv.rb +568 -0
- data/vfcsv.gemspec +43 -0
- metadata +149 -0
data/lib/vfcsv.rb
ADDED
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "vfcsv/version"
|
|
4
|
+
require "date"
|
|
5
|
+
|
|
6
|
+
# VFCSV - Very Fast CSV Parser
|
|
7
|
+
#
|
|
8
|
+
# Drop-in replacement for Ruby's CSV library with SIMD acceleration.
|
|
9
|
+
# Provides 2-20x faster parsing while maintaining full API compatibility.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic usage (drop-in replacement)
|
|
12
|
+
# # Instead of: require 'csv'
|
|
13
|
+
# require 'vfcsv'
|
|
14
|
+
#
|
|
15
|
+
# # Use VFCSV exactly like CSV
|
|
16
|
+
# VFCSV.parse("a,b,c\n1,2,3")
|
|
17
|
+
# VFCSV.read("data.csv", headers: true)
|
|
18
|
+
# VFCSV.foreach("data.csv") { |row| puts row }
|
|
19
|
+
#
|
|
20
|
+
class VFCSV
|
|
21
|
+
class MalformedCSVError < StandardError; end
|
|
22
|
+
|
|
23
|
+
# Built-in converters matching Ruby's CSV
|
|
24
|
+
Converters = {
|
|
25
|
+
integer: ->(value) {
|
|
26
|
+
begin
|
|
27
|
+
Integer(value, 10)
|
|
28
|
+
rescue ArgumentError, TypeError
|
|
29
|
+
value
|
|
30
|
+
end
|
|
31
|
+
},
|
|
32
|
+
float: ->(value) {
|
|
33
|
+
begin
|
|
34
|
+
Float(value)
|
|
35
|
+
rescue ArgumentError, TypeError
|
|
36
|
+
value
|
|
37
|
+
end
|
|
38
|
+
},
|
|
39
|
+
numeric: ->(value) {
|
|
40
|
+
begin
|
|
41
|
+
Integer(value, 10)
|
|
42
|
+
rescue ArgumentError, TypeError
|
|
43
|
+
begin
|
|
44
|
+
Float(value)
|
|
45
|
+
rescue ArgumentError, TypeError
|
|
46
|
+
value
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
},
|
|
50
|
+
date: ->(value) {
|
|
51
|
+
begin
|
|
52
|
+
Date.parse(value)
|
|
53
|
+
rescue ArgumentError, TypeError
|
|
54
|
+
value
|
|
55
|
+
end
|
|
56
|
+
},
|
|
57
|
+
date_time: ->(value) {
|
|
58
|
+
begin
|
|
59
|
+
DateTime.parse(value)
|
|
60
|
+
rescue ArgumentError, TypeError
|
|
61
|
+
value
|
|
62
|
+
end
|
|
63
|
+
},
|
|
64
|
+
all: ->(value) {
|
|
65
|
+
# Try numeric first, then date_time
|
|
66
|
+
result = begin
|
|
67
|
+
Integer(value, 10)
|
|
68
|
+
rescue ArgumentError, TypeError
|
|
69
|
+
begin
|
|
70
|
+
Float(value)
|
|
71
|
+
rescue ArgumentError, TypeError
|
|
72
|
+
begin
|
|
73
|
+
DateTime.parse(value)
|
|
74
|
+
rescue ArgumentError, TypeError
|
|
75
|
+
value
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
result
|
|
80
|
+
}
|
|
81
|
+
}.freeze
|
|
82
|
+
|
|
83
|
+
# Built-in header converters matching Ruby's CSV
|
|
84
|
+
HeaderConverters = {
|
|
85
|
+
downcase: ->(header) { header.downcase },
|
|
86
|
+
symbol: ->(header) {
|
|
87
|
+
header.encode(Encoding::UTF_8)
|
|
88
|
+
.downcase
|
|
89
|
+
.gsub(/\s+/, "_")
|
|
90
|
+
.gsub(/[^\w]/, "")
|
|
91
|
+
.to_sym
|
|
92
|
+
}
|
|
93
|
+
}.freeze
|
|
94
|
+
|
|
95
|
+
# Default options matching Ruby's CSV
|
|
96
|
+
DEFAULT_OPTIONS = {
|
|
97
|
+
col_sep: ",",
|
|
98
|
+
row_sep: :auto,
|
|
99
|
+
quote_char: '"',
|
|
100
|
+
field_size_limit: nil,
|
|
101
|
+
converters: nil,
|
|
102
|
+
unconverted_fields: nil,
|
|
103
|
+
headers: false,
|
|
104
|
+
return_headers: false,
|
|
105
|
+
header_converters: nil,
|
|
106
|
+
skip_blanks: false,
|
|
107
|
+
skip_lines: nil,
|
|
108
|
+
force_quotes: false,
|
|
109
|
+
liberal_parsing: false,
|
|
110
|
+
quote_empty: true,
|
|
111
|
+
nil_value: nil,
|
|
112
|
+
empty_value: "",
|
|
113
|
+
}.freeze
|
|
114
|
+
|
|
115
|
+
class << self
|
|
116
|
+
# Parse a CSV string into an array of arrays (or Table if headers: true)
|
|
117
|
+
#
|
|
118
|
+
# @param str [String] CSV data to parse
|
|
119
|
+
# @param options [Hash] Parsing options
|
|
120
|
+
# @option options [String] :col_sep Column separator (default: ",")
|
|
121
|
+
# @option options [String] :quote_char Quote character (default: '"')
|
|
122
|
+
# @option options [Boolean] :headers Treat first row as headers (default: false)
|
|
123
|
+
# @option options [Symbol, Array, Proc] :converters Value converters
|
|
124
|
+
# @option options [Symbol, Array, Proc] :header_converters Header converters
|
|
125
|
+
# @option options [Boolean] :skip_blanks Skip blank rows (default: false)
|
|
126
|
+
# @option options [Regexp] :skip_lines Skip lines matching pattern
|
|
127
|
+
# @return [Array<Array<String>>] or [Table] if headers: true
|
|
128
|
+
#
|
|
129
|
+
# @example Parse simple CSV
|
|
130
|
+
# VFCSV.parse("a,b,c\n1,2,3")
|
|
131
|
+
# #=> [["a", "b", "c"], ["1", "2", "3"]]
|
|
132
|
+
#
|
|
133
|
+
# @example Parse with headers
|
|
134
|
+
# VFCSV.parse("a,b,c\n1,2,3", headers: true)
|
|
135
|
+
# #=> #<VFCSV::Table>
|
|
136
|
+
#
|
|
137
|
+
def parse(str, **options, &block)
|
|
138
|
+
opts = DEFAULT_OPTIONS.merge(options)
|
|
139
|
+
rows = rust_ext.parse(str.to_s, opts[:col_sep].to_s, opts[:quote_char].to_s)
|
|
140
|
+
|
|
141
|
+
# Post-process: convert empty strings to nil (matching Ruby CSV behavior)
|
|
142
|
+
# Also handle blank rows (single empty field -> empty array)
|
|
143
|
+
nil_value = opts[:nil_value]
|
|
144
|
+
rows = rows.map do |row|
|
|
145
|
+
# A row with just one empty field is a blank row
|
|
146
|
+
if row.size == 1 && row[0].empty?
|
|
147
|
+
[]
|
|
148
|
+
else
|
|
149
|
+
row.map { |field| field.empty? ? nil_value : field }
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Handle skip_blanks
|
|
154
|
+
if opts[:skip_blanks]
|
|
155
|
+
rows = rows.reject { |row| row.empty? || row.all?(&:nil?) }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Handle skip_lines
|
|
159
|
+
if opts[:skip_lines]
|
|
160
|
+
pattern = opts[:skip_lines]
|
|
161
|
+
original_str_lines = str.to_s.lines
|
|
162
|
+
rows = rows.reject.with_index do |_row, i|
|
|
163
|
+
i < original_str_lines.length && original_str_lines[i].match?(pattern)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
if opts[:headers] && rows.length > 0
|
|
168
|
+
header_row = rows.shift
|
|
169
|
+
|
|
170
|
+
# Apply header converters
|
|
171
|
+
header_row = apply_header_converters(header_row, opts[:header_converters])
|
|
172
|
+
|
|
173
|
+
# Build table of Row objects
|
|
174
|
+
table_rows = rows.map do |row|
|
|
175
|
+
# Apply converters to values
|
|
176
|
+
converted_row = apply_converters(row, opts[:converters])
|
|
177
|
+
Row.new(header_row, converted_row)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
result = Table.new(table_rows, headers: header_row)
|
|
181
|
+
|
|
182
|
+
if block_given?
|
|
183
|
+
result.each(&block)
|
|
184
|
+
nil
|
|
185
|
+
else
|
|
186
|
+
result
|
|
187
|
+
end
|
|
188
|
+
else
|
|
189
|
+
# Apply converters to all rows
|
|
190
|
+
if opts[:converters]
|
|
191
|
+
rows = rows.map { |row| apply_converters(row, opts[:converters]) }
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
if block_given?
|
|
195
|
+
rows.each(&block)
|
|
196
|
+
nil
|
|
197
|
+
else
|
|
198
|
+
rows
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Parse a single CSV line
|
|
204
|
+
#
|
|
205
|
+
# @param line [String] Single CSV line
|
|
206
|
+
# @param options [Hash] Parsing options
|
|
207
|
+
# @return [Array<String>] Fields from the line
|
|
208
|
+
#
|
|
209
|
+
# @example
|
|
210
|
+
# VFCSV.parse_line("a,b,c")
|
|
211
|
+
# #=> ["a", "b", "c"]
|
|
212
|
+
#
|
|
213
|
+
def parse_line(line, **options)
|
|
214
|
+
opts = DEFAULT_OPTIONS.merge(options)
|
|
215
|
+
rows = rust_ext.parse(line.to_s, opts[:col_sep].to_s, opts[:quote_char].to_s)
|
|
216
|
+
row = rows.first || []
|
|
217
|
+
|
|
218
|
+
if opts[:converters]
|
|
219
|
+
row = apply_converters(row, opts[:converters])
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
row
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Read a CSV file
|
|
226
|
+
#
|
|
227
|
+
# @param path [String] Path to CSV file
|
|
228
|
+
# @param options [Hash] Parsing options (same as parse)
|
|
229
|
+
# @return [Array<Array<String>>] or [Table] if headers: true
|
|
230
|
+
#
|
|
231
|
+
# @example
|
|
232
|
+
# VFCSV.read("data.csv")
|
|
233
|
+
# VFCSV.read("data.csv", headers: true)
|
|
234
|
+
#
|
|
235
|
+
def read(path, **options)
|
|
236
|
+
parse(File.read(path), **options)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Alias for read
|
|
240
|
+
def readlines(path, **options)
|
|
241
|
+
read(path, **options)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Iterate over a CSV file row by row
|
|
245
|
+
#
|
|
246
|
+
# @param path [String] Path to CSV file
|
|
247
|
+
# @param mode [String] File open mode (ignored, for compatibility)
|
|
248
|
+
# @param options [Hash] Parsing options
|
|
249
|
+
# @yield [Array<String>] or [Row] Each row
|
|
250
|
+
# @return [Enumerator] if no block given
|
|
251
|
+
#
|
|
252
|
+
# @example
|
|
253
|
+
# VFCSV.foreach("data.csv") { |row| puts row.inspect }
|
|
254
|
+
# VFCSV.foreach("data.csv", headers: true) { |row| puts row["name"] }
|
|
255
|
+
#
|
|
256
|
+
def foreach(path, mode = "r", **options, &block)
|
|
257
|
+
return to_enum(__method__, path, mode, **options) unless block_given?
|
|
258
|
+
|
|
259
|
+
parse(File.read(path), **options, &block)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Generate CSV string from data
|
|
263
|
+
#
|
|
264
|
+
# @param str [String, nil] Optional string to append to
|
|
265
|
+
# @param options [Hash] Generation options
|
|
266
|
+
# @yield [VFCSV] CSV generator
|
|
267
|
+
# @return [String] Generated CSV
|
|
268
|
+
#
|
|
269
|
+
# @example
|
|
270
|
+
# VFCSV.generate do |csv|
|
|
271
|
+
# csv << ["a", "b", "c"]
|
|
272
|
+
# csv << [1, 2, 3]
|
|
273
|
+
# end
|
|
274
|
+
# #=> "a,b,c\n1,2,3\n"
|
|
275
|
+
#
|
|
276
|
+
def generate(str = nil, **options)
|
|
277
|
+
opts = DEFAULT_OPTIONS.merge(options)
|
|
278
|
+
generator = Generator.new(str || "", opts)
|
|
279
|
+
yield generator if block_given?
|
|
280
|
+
generator.to_s
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Generate a single CSV line
|
|
284
|
+
#
|
|
285
|
+
# @param row [Array] Fields to generate
|
|
286
|
+
# @param options [Hash] Generation options
|
|
287
|
+
# @return [String] CSV line
|
|
288
|
+
#
|
|
289
|
+
# @example
|
|
290
|
+
# VFCSV.generate_line(["a", "b", "c"])
|
|
291
|
+
# #=> "a,b,c\n"
|
|
292
|
+
#
|
|
293
|
+
def generate_line(row, **options)
|
|
294
|
+
opts = DEFAULT_OPTIONS.merge(options)
|
|
295
|
+
Generator.generate_line(row, **opts)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Generate multiple CSV lines
|
|
299
|
+
#
|
|
300
|
+
# @param rows [Array<Array>] Rows to generate
|
|
301
|
+
# @param options [Hash] Generation options
|
|
302
|
+
# @return [String] CSV string
|
|
303
|
+
#
|
|
304
|
+
def generate_lines(rows, **options)
|
|
305
|
+
generate(**options) do |csv|
|
|
306
|
+
rows.each { |row| csv << row }
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Read CSV as a table with headers
|
|
311
|
+
#
|
|
312
|
+
# @param path [String] Path to CSV file
|
|
313
|
+
# @param options [Hash] Parsing options
|
|
314
|
+
# @return [Table] Table object
|
|
315
|
+
#
|
|
316
|
+
def table(path, **options)
|
|
317
|
+
read(path, headers: true, **options)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Open a CSV file for reading or writing
|
|
321
|
+
#
|
|
322
|
+
# @param path [String] Path to CSV file
|
|
323
|
+
# @param mode [String] File open mode ("r", "w", "a", etc.)
|
|
324
|
+
# @param options [Hash] CSV options
|
|
325
|
+
# @yield [VFCSV] CSV instance
|
|
326
|
+
# @return [Object] Result of block, or VFCSV instance
|
|
327
|
+
#
|
|
328
|
+
def open(path, mode = "r", **options, &block)
|
|
329
|
+
if mode.include?("w") || mode.include?("a")
|
|
330
|
+
# Writing mode
|
|
331
|
+
csv = Writer.new(path, mode, options)
|
|
332
|
+
if block_given?
|
|
333
|
+
begin
|
|
334
|
+
yield csv
|
|
335
|
+
ensure
|
|
336
|
+
csv.close
|
|
337
|
+
end
|
|
338
|
+
else
|
|
339
|
+
csv
|
|
340
|
+
end
|
|
341
|
+
else
|
|
342
|
+
# Reading mode - just use foreach
|
|
343
|
+
if block_given?
|
|
344
|
+
foreach(path, mode, **options, &block)
|
|
345
|
+
else
|
|
346
|
+
read(path, **options)
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Get or create a CSV instance (for compatibility)
|
|
352
|
+
#
|
|
353
|
+
# @param data [String, IO] CSV data source
|
|
354
|
+
# @param options [Hash] CSV options
|
|
355
|
+
# @return [VFCSV::Instance]
|
|
356
|
+
#
|
|
357
|
+
def instance(data = nil, **options)
|
|
358
|
+
Instance.new(data, **options)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Filter CSV input to output (compatibility method)
|
|
362
|
+
def filter(input = $stdin, output = $stdout, **options)
|
|
363
|
+
# Read from input, transform, write to output
|
|
364
|
+
input_str = input.respond_to?(:read) ? input.read : input.to_s
|
|
365
|
+
rows = parse(input_str, **options)
|
|
366
|
+
|
|
367
|
+
result = if block_given?
|
|
368
|
+
rows.map { |row| yield row }
|
|
369
|
+
else
|
|
370
|
+
rows
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
output_str = generate(**options) do |csv|
|
|
374
|
+
result.each { |row| csv << row if row }
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
if output.respond_to?(:write)
|
|
378
|
+
output.write(output_str)
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
output_str
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Get SIMD information
|
|
385
|
+
def simd_info
|
|
386
|
+
rust_ext.simd_info
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
private
|
|
390
|
+
|
|
391
|
+
def rust_ext
|
|
392
|
+
@rust_ext ||= begin
|
|
393
|
+
require_relative "vfcsv/vfcsv_rust"
|
|
394
|
+
RustExt
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def apply_converters(row, converters)
|
|
399
|
+
return row if converters.nil?
|
|
400
|
+
|
|
401
|
+
converter_procs = normalize_converters(converters, Converters)
|
|
402
|
+
return row if converter_procs.empty?
|
|
403
|
+
|
|
404
|
+
row.map do |value|
|
|
405
|
+
converter_procs.reduce(value) do |v, converter|
|
|
406
|
+
converter.call(v)
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def apply_header_converters(headers, converters)
|
|
412
|
+
return headers if converters.nil?
|
|
413
|
+
|
|
414
|
+
converter_procs = normalize_converters(converters, HeaderConverters)
|
|
415
|
+
return headers if converter_procs.empty?
|
|
416
|
+
|
|
417
|
+
headers.map do |header|
|
|
418
|
+
converter_procs.reduce(header) do |h, converter|
|
|
419
|
+
converter.call(h)
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def normalize_converters(converters, builtin_hash)
|
|
425
|
+
case converters
|
|
426
|
+
when nil
|
|
427
|
+
[]
|
|
428
|
+
when Symbol
|
|
429
|
+
[builtin_hash[converters]].compact
|
|
430
|
+
when Proc
|
|
431
|
+
[converters]
|
|
432
|
+
when Array
|
|
433
|
+
converters.flat_map { |c| normalize_converters(c, builtin_hash) }
|
|
434
|
+
else
|
|
435
|
+
[]
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Generator for building CSV strings
|
|
441
|
+
class Generator
|
|
442
|
+
def initialize(str, options)
|
|
443
|
+
@output = str.dup
|
|
444
|
+
@col_sep = options[:col_sep] || ","
|
|
445
|
+
@quote_char = options[:quote_char] || '"'
|
|
446
|
+
@row_sep = options[:row_sep] == :auto ? "\n" : (options[:row_sep] || "\n")
|
|
447
|
+
@force_quotes = options[:force_quotes] || false
|
|
448
|
+
@quote_empty = options.fetch(:quote_empty, true)
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def <<(row)
|
|
452
|
+
@output << self.class.generate_line(row,
|
|
453
|
+
col_sep: @col_sep,
|
|
454
|
+
quote_char: @quote_char,
|
|
455
|
+
row_sep: @row_sep,
|
|
456
|
+
force_quotes: @force_quotes,
|
|
457
|
+
quote_empty: @quote_empty
|
|
458
|
+
)
|
|
459
|
+
self
|
|
460
|
+
end
|
|
461
|
+
alias_method :add_row, :<<
|
|
462
|
+
alias_method :puts, :<<
|
|
463
|
+
|
|
464
|
+
def to_s
|
|
465
|
+
@output
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def self.generate_line(row, **options)
|
|
469
|
+
col_sep = options[:col_sep] || ","
|
|
470
|
+
quote_char = options[:quote_char] || '"'
|
|
471
|
+
row_sep = options[:row_sep]
|
|
472
|
+
row_sep = "\n" if row_sep.nil? || row_sep == :auto
|
|
473
|
+
force_quotes = options[:force_quotes] || false
|
|
474
|
+
quote_empty = options.fetch(:quote_empty, true)
|
|
475
|
+
|
|
476
|
+
fields = row.map do |field|
|
|
477
|
+
field_str = field.to_s
|
|
478
|
+
if force_quotes || needs_quoting?(field_str, col_sep, quote_char) || (quote_empty && field_str.empty?)
|
|
479
|
+
quote_field(field_str, quote_char)
|
|
480
|
+
else
|
|
481
|
+
field_str
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
fields.join(col_sep) + row_sep
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
def self.needs_quoting?(str, col_sep, quote_char)
|
|
489
|
+
str.include?(col_sep) || str.include?(quote_char) || str.include?("\n") || str.include?("\r")
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def self.quote_field(str, quote_char)
|
|
493
|
+
escaped = str.gsub(quote_char, quote_char + quote_char)
|
|
494
|
+
"#{quote_char}#{escaped}#{quote_char}"
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
# Writer for streaming CSV to files
|
|
499
|
+
class Writer
|
|
500
|
+
def initialize(path, mode, options)
|
|
501
|
+
@file = File.open(path, mode)
|
|
502
|
+
@options = VFCSV::DEFAULT_OPTIONS.merge(options)
|
|
503
|
+
@col_sep = @options[:col_sep] || ","
|
|
504
|
+
@quote_char = @options[:quote_char] || '"'
|
|
505
|
+
@force_quotes = @options[:force_quotes] || false
|
|
506
|
+
@quote_empty = @options.fetch(:quote_empty, true)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def <<(row)
|
|
510
|
+
@file.write(Generator.generate_line(row,
|
|
511
|
+
col_sep: @col_sep,
|
|
512
|
+
quote_char: @quote_char,
|
|
513
|
+
force_quotes: @force_quotes,
|
|
514
|
+
quote_empty: @quote_empty
|
|
515
|
+
))
|
|
516
|
+
self
|
|
517
|
+
end
|
|
518
|
+
alias_method :add_row, :<<
|
|
519
|
+
alias_method :puts, :<<
|
|
520
|
+
|
|
521
|
+
def close
|
|
522
|
+
@file.close
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Instance wrapper for stateful CSV operations
|
|
527
|
+
class Instance
|
|
528
|
+
include Enumerable
|
|
529
|
+
|
|
530
|
+
def initialize(data = nil, **options)
|
|
531
|
+
@data = data
|
|
532
|
+
@options = VFCSV::DEFAULT_OPTIONS.merge(options)
|
|
533
|
+
@rows = nil
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def each(&block)
|
|
537
|
+
return to_enum(__method__) unless block_given?
|
|
538
|
+
ensure_parsed
|
|
539
|
+
@rows.each(&block)
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def read
|
|
543
|
+
ensure_parsed
|
|
544
|
+
@rows
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def headers
|
|
548
|
+
ensure_parsed
|
|
549
|
+
@rows.respond_to?(:headers) ? @rows.headers : nil
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
private
|
|
553
|
+
|
|
554
|
+
def ensure_parsed
|
|
555
|
+
return if @rows
|
|
556
|
+
str = @data.respond_to?(:read) ? @data.read : @data.to_s
|
|
557
|
+
@rows = VFCSV.parse(str, **@options)
|
|
558
|
+
end
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
# Rust extension module - methods defined in Rust
|
|
562
|
+
module RustExt
|
|
563
|
+
end
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
# Load Row and Table classes
|
|
567
|
+
require_relative "vfcsv/row"
|
|
568
|
+
require_relative "vfcsv/table"
|
data/vfcsv.gemspec
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/vfcsv/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "vfcsv"
|
|
7
|
+
spec.version = VFCSV::VERSION
|
|
8
|
+
spec.authors = ["Chris Hasinski"]
|
|
9
|
+
spec.email = ["krzysztof.hasinski@gmail.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "VFCSV - Drop-in replacement for Ruby's CSV with SIMD acceleration"
|
|
12
|
+
spec.description = "SIMD-accelerated CSV parser - drop-in replacement for Ruby's CSV library. " \
|
|
13
|
+
"Uses NEON on ARM64 and AVX2 on x86_64 for 2-6x faster parsing. " \
|
|
14
|
+
"Full API compatibility with CSV::Row, CSV::Table, converters, and all options."
|
|
15
|
+
spec.homepage = "https://github.com/khasinski/vfcsv"
|
|
16
|
+
spec.license = "MIT"
|
|
17
|
+
spec.required_ruby_version = ">= 3.0.0"
|
|
18
|
+
|
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
20
|
+
spec.metadata["source_code_uri"] = "#{spec.homepage}"
|
|
21
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
22
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
23
|
+
|
|
24
|
+
spec.files = Dir.chdir(__dir__) do
|
|
25
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
26
|
+
(File.expand_path(f) == __FILE__) ||
|
|
27
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .github])
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
spec.require_paths = ["lib"]
|
|
32
|
+
spec.extensions = ["ext/vfcsv_rust/extconf.rb"]
|
|
33
|
+
|
|
34
|
+
# Build dependencies
|
|
35
|
+
spec.add_dependency "rb_sys", "~> 0.9"
|
|
36
|
+
|
|
37
|
+
# Development dependencies
|
|
38
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
39
|
+
spec.add_development_dependency "rake-compiler", "~> 1.2"
|
|
40
|
+
spec.add_development_dependency "rb_sys", "~> 0.9"
|
|
41
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
42
|
+
spec.add_development_dependency "benchmark-ips", "~> 2.0"
|
|
43
|
+
end
|