smarter_csv 1.17.4 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,10 +3,11 @@
3
3
  module SmarterCSV
4
4
  module HashTransformations
5
5
  # Frozen regex constants for performance (avoid recompilation on every value)
6
- NUMERIC_REGEX = /\A[+-]?\d+(?:\.\d+)?\z/.freeze
6
+ NUMERIC_REGEX = /\A[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\z/.freeze
7
7
  # FLOAT_REGEX = /\A[+-]?\d+\.\d+\z/.freeze
8
8
  # INTEGER_REGEX = /\A[+-]?\d+\z/.freeze
9
9
  ZERO_REGEX = /\A[+-]?0+(?:\.0+)?\z/.freeze # could be +0.0
10
+ EXPONENT_CHARS = %w[e E].freeze # mantissa scan stops here in significant_digits
10
11
 
11
12
  # First-byte values that can begin a numeric literal — used to skip the numeric
12
13
  # regexes for values that obviously aren't numbers (e.g. city names).
@@ -70,7 +71,13 @@ module SmarterCSV
70
71
  first_byte = v.getbyte(0)
71
72
  if first_byte && ((first_byte >= ZERO_BYTE && first_byte <= NINE_BYTE) || first_byte == MINUS_BYTE || first_byte == PLUS_BYTE)
72
73
  if NUMERIC_REGEX.match?(v)
73
- hash[k] = v.include?('.') ? v.to_f : v.to_i
74
+ # A value with a '.' or an exponent is a decimal → honor decimal_precision;
75
+ # otherwise it's an integer.
76
+ hash[k] = if v.include?('.') || v.include?('e') || v.include?('E')
77
+ convert_decimal(v, options[:decimal_precision])
78
+ else
79
+ v.to_i
80
+ end
74
81
  end
75
82
  end
76
83
  end
@@ -121,6 +128,48 @@ module SmarterCSV
121
128
 
122
129
  protected
123
130
 
131
+ # Convert a decimal string (has a '.' or an exponent) to a numeric, honoring
132
+ # decimal_precision: :float -> Float, :bigdecimal -> BigDecimal, :auto -> Float unless
133
+ # the value carries more than 16 significant digits (then BigDecimal, no precision loss).
134
+ def convert_decimal(str, decimal_precision)
135
+ case decimal_precision
136
+ when :float
137
+ str.to_f
138
+ when :bigdecimal
139
+ BigDecimal(str)
140
+ else # :auto
141
+ # A float token always has a '.' or 'e', so a token of <= 17 bytes holds at most
142
+ # 16 digits and therefore <= 16 significant digits — skip the per-char scan and go
143
+ # straight to Float (the common case: coordinates, sensor readings, prices). Only
144
+ # longer tokens can reach the BigDecimal threshold, so pay for the scan only then.
145
+ if str.bytesize > 17 && significant_digits(str) > 16
146
+ BigDecimal(str)
147
+ else
148
+ str.to_f
149
+ end
150
+ end
151
+ end
152
+
153
+ # Count significant mantissa digits (leading zeros excluded, trailing and fraction
154
+ # digits included, exponent excluded). Matches the C path's fj_sig_digits / Oj's dec_cnt
155
+ # so :auto picks Float vs BigDecimal identically on both paths.
156
+ def significant_digits(str)
157
+ cnt = 0
158
+ started = false
159
+ str.each_char do |c|
160
+ break if EXPONENT_CHARS.include?(c)
161
+ next unless c >= '0' && c <= '9'
162
+
163
+ if started
164
+ cnt += 1
165
+ elsif c != '0'
166
+ started = true
167
+ cnt = 1
168
+ end
169
+ end
170
+ cnt
171
+ end
172
+
124
173
  # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
125
174
  def limit_execution_for_only_or_except(options, option_name, key)
126
175
  if options[option_name].is_a?(Hash)
@@ -17,6 +17,7 @@ module SmarterCSV
17
17
  collect_raw_lines: true,
18
18
  comment_regexp: nil, # was: /\A#/,
19
19
  convert_values_to_numeric: true,
20
+ decimal_precision: :auto, # :auto (Float, but BigDecimal above 16 significant digits), :float, or :bigdecimal
20
21
  downcase_header: true,
21
22
  duplicate_header_suffix: '', # was: nil,
22
23
  field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
@@ -58,6 +59,12 @@ module SmarterCSV
58
59
  with_line_numbers: false,
59
60
  }.freeze
60
61
 
62
+ # Options whose canonical value is one of a fixed set of symbols. A string form
63
+ # (e.g. "backslash" from options round-tripped through JSON or YAML) is coerced to
64
+ # the matching symbol. Non-string values (a callable for on_bad_row, true/false for
65
+ # legacy verbose) pass through untouched.
66
+ SYMBOL_VALUE_OPTIONS = %i[quote_escaping quote_boundary missing_headers on_bad_row verbose decimal_precision].freeze
67
+
61
68
  # NOTE: this is not called when "parse" methods are tested by themselves
62
69
  def process_options(given_options = {})
63
70
  # Debug output before merge — check raw verbose value (true or :debug)
@@ -77,6 +84,10 @@ module SmarterCSV
77
84
 
78
85
  @options = DEFAULT_OPTIONS.dup.merge!(given_options)
79
86
 
87
+ # Symbol/string interchangeability: accept either form for every option whose
88
+ # value is a symbol or a string. Done once here, before any value is read below.
89
+ normalize_option_value_types!(@options)
90
+
80
91
  # Normalize verbose to a symbol — done once here, stored back into @options.
81
92
  # All subsequent checks are free symbol comparisons; no re-evaluation needed.
82
93
  # :quiet — suppress all warnings and notices (good for production)
@@ -194,6 +205,9 @@ module SmarterCSV
194
205
  unless %i[legacy standard].include?(options[:quote_boundary])
195
206
  errors << "invalid quote_boundary: must be :legacy or :standard"
196
207
  end
208
+ unless %i[auto float bigdecimal].include?(options[:decimal_precision])
209
+ errors << "invalid decimal_precision: must be :auto, :float, or :bigdecimal"
210
+ end
197
211
  arc = options[:auto_row_sep_chars]
198
212
  min_arc = SmarterCSV::AutoDetection::MIN_AUTO_ROW_SEP_CHARS
199
213
  max_arc = SmarterCSV::AutoDetection::MAX_AUTO_ROW_SEP_CHARS
@@ -268,6 +282,16 @@ module SmarterCSV
268
282
  raise SmarterCSV::ValidationError, errors.inspect if errors.any?
269
283
  end
270
284
 
285
+ # Accept either a symbol or a string for every option whose value is one or the
286
+ # other, so callers limited to strings (JSON/YAML) behave the same as those passing
287
+ # symbols, and vice versa. Validation of the resulting value happens later.
288
+ def normalize_option_value_types!(options)
289
+ SYMBOL_VALUE_OPTIONS.each do |key|
290
+ v = options[key]
291
+ options[key] = v.to_sym if v.is_a?(String)
292
+ end
293
+ end
294
+
271
295
  def option_valid?(str)
272
296
  return true if str.is_a?(Symbol) && str == :auto
273
297
  return true if str.is_a?(String) && !str.empty?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.17.4"
4
+ VERSION = "1.18.0"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'stringio'
4
+ require 'bigdecimal' # for decimal_precision: :auto / :bigdecimal
4
5
  require "smarter_csv/version"
5
6
  require "smarter_csv/errors"
6
7
 
data/smarter_csv.gemspec CHANGED
@@ -40,6 +40,9 @@ Gem::Specification.new do |spec|
40
40
 
41
41
  spec.required_ruby_version = ">= 2.6.0"
42
42
 
43
+ # bigdecimal is no longer a default gem on Ruby 3.4+; needed for decimal_precision: :auto / :bigdecimal
44
+ spec.add_dependency "bigdecimal"
45
+
43
46
  # Specify which files should be added to the gem when it is released.
44
47
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
45
48
  spec.files = Dir.chdir(__dir__) do
metadata CHANGED
@@ -1,14 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.17.4
4
+ version: 1.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-06-03 00:00:00.000000000 Z
11
- dependencies: []
10
+ date: 2026-06-19 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: bigdecimal
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
12
26
  description: |
13
27
  SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
14
28
  fastest end-to-end ingestion — not just parsing. It returns ready-to-use
@@ -71,6 +85,10 @@ files:
71
85
  - docs/warnings.md
72
86
  - ext/smarter_csv/extconf.rb
73
87
  - ext/smarter_csv/smarter_csv.c
88
+ - ext/smarter_csv/vendor/LICENSE-fast_float-MIT
89
+ - ext/smarter_csv/vendor/eisel_lemire.h
90
+ - ext/smarter_csv/vendor/eisel_lemire.md
91
+ - ext/smarter_csv/vendor/eisel_lemire_powers.h
74
92
  - images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
75
93
  - images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
76
94
  - images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
@@ -122,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
140
  - !ruby/object:Gem::Version
123
141
  version: '0'
124
142
  requirements: []
125
- rubygems_version: 4.0.11
143
+ rubygems_version: 3.6.9
126
144
  specification_version: 4
127
145
  summary: Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready
128
146
  hash output