smarter_csv 1.9.0 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/smarter_csv.rb CHANGED
@@ -1,622 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "extensions/hash"
4
- require_relative "smarter_csv/version"
3
+ require "core_ext/hash"
5
4
 
6
- require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
7
- # require 'smarter_csv.bundle' unless ENV['CI'] # local testing
5
+ require "smarter_csv/version"
6
+ require "smarter_csv/options_processing"
8
7
 
9
- module SmarterCSV
10
- class SmarterCSVException < StandardError; end
11
- class HeaderSizeMismatch < SmarterCSVException; end
12
- class IncorrectOption < SmarterCSVException; end
13
- class ValidationError < SmarterCSVException; end
14
- class DuplicateHeaders < SmarterCSVException; end
15
- class MissingKeys < SmarterCSVException; end # previously known as MissingHeaders
16
- class NoColSepDetected < SmarterCSVException; end
17
- class KeyMappingError < SmarterCSVException; end
18
-
19
- # first parameter: filename or input object which responds to readline method
20
- def SmarterCSV.process(input, options = {}, &block) # rubocop:disable Lint/UnusedMethodArgument
21
- options = default_options.merge(options)
22
- options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
23
- puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
24
- validate_options!(options)
25
-
26
- headerA = []
27
- result = []
28
- @file_line_count = 0
29
- @csv_line_count = 0
30
- has_rails = !!defined?(Rails)
31
- begin
32
- fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
33
-
34
- # auto-detect the row separator
35
- options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
36
- # attempt to auto-detect column separator
37
- options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto
38
-
39
- if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
40
- puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
41
- end
42
-
43
- skip_lines(fh, options)
44
-
45
- headerA, header_size = process_headers(fh, options)
46
-
47
- # in case we use chunking.. we'll need to set it up..
48
- if !options[:chunk_size].nil? && options[:chunk_size].to_i > 0
49
- use_chunks = true
50
- chunk_size = options[:chunk_size].to_i
51
- chunk_count = 0
52
- chunk = []
53
- else
54
- use_chunks = false
55
- end
56
-
57
- # now on to processing all the rest of the lines in the CSV file:
58
- until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
59
- line = readline_with_counts(fh, options)
60
-
61
- # replace invalid byte sequence in UTF-8 with question mark to avoid errors
62
- line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
63
-
64
- print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
65
-
66
- next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
67
-
68
- # cater for the quoted csv data containing the row separator carriage return character
69
- # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
70
- # by detecting the existence of an uneven number of quote characters
71
-
72
- multiline = count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
73
- while count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
74
- next_line = fh.readline(options[:row_sep])
75
- next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
76
- line += next_line
77
- @file_line_count += 1
78
- end
79
- print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
80
-
81
- line.chomp!(options[:row_sep])
82
-
83
- dataA, _data_size = parse(line, options, header_size)
84
-
85
- dataA.map!{|x| x.strip} if options[:strip_whitespace]
86
-
87
- # if all values are blank, then ignore this line
88
- next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))
89
-
90
- hash = Hash.zip(headerA, dataA) # from Facets of Ruby library
91
-
92
- # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
93
- # Note: Ruby < 1.9 doesn't allow empty symbol literals!
94
- hash.delete(nil)
95
- hash.delete('')
96
- eval('hash.delete(:"")') if RUBY_VERSION.to_f > 1.8
97
-
98
- if options[:remove_empty_values] == true
99
- hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)}
100
- end
101
-
102
- hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings
103
- hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
104
-
105
- if options[:convert_values_to_numeric]
106
- hash.each do |k, v|
107
- # deal with the :only / :except options to :convert_values_to_numeric
108
- next if only_or_except_limit_execution(options, :convert_values_to_numeric, k)
109
-
110
- # convert if it's a numeric value:
111
- case v
112
- when /^[+-]?\d+\.\d+$/
113
- hash[k] = v.to_f
114
- when /^[+-]?\d+$/
115
- hash[k] = v.to_i
116
- end
117
- end
118
- end
119
-
120
- if options[:value_converters]
121
- hash.each do |k, v|
122
- converter = options[:value_converters][k]
123
- next unless converter
124
-
125
- hash[k] = converter.convert(v)
126
- end
127
- end
128
-
129
- next if options[:remove_empty_hashes] && hash.empty?
130
-
131
- hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
132
-
133
- if use_chunks
134
- chunk << hash # append temp result to chunk
135
-
136
- if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
137
- # do something with the chunk
138
- if block_given?
139
- yield chunk # do something with the hashes in the chunk in the block
140
- else
141
- result << chunk # not sure yet, why anybody would want to do this without a block
142
- end
143
- chunk_count += 1
144
- chunk = [] # initialize for next chunk of data
145
- else
146
-
147
- # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18)
148
-
149
- end
150
-
151
- # while a chunk is being filled up we don't need to do anything else here
152
-
153
- else # no chunk handling
154
- if block_given?
155
- yield [hash] # do something with the hash in the block (better to use chunking here)
156
- else
157
- result << hash
158
- end
159
- end
160
- end
161
-
162
- # print new line to retain last processing line message
163
- print "\n" if options[:verbose]
164
-
165
- # last chunk:
166
- if !chunk.nil? && chunk.size > 0
167
- # do something with the chunk
168
- if block_given?
169
- yield chunk # do something with the hashes in the chunk in the block
170
- else
171
- result << chunk # not sure yet, why anybody would want to do this without a block
172
- end
173
- chunk_count += 1
174
- # chunk = [] # initialize for next chunk of data
175
- end
176
- ensure
177
- fh.close if fh.respond_to?(:close)
178
- end
179
- if block_given?
180
- chunk_count # when we do processing through a block we only care how many chunks we processed
8
+ case RUBY_ENGINE
9
+ when 'ruby'
10
+ begin
11
+ if `uname -s`.chomp == 'Darwin'
12
+ require 'smarter_csv/smarter_csv.bundle'
181
13
  else
182
- result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
14
+ require_relative "smarter_csv/smarter_csv"
183
15
  end
16
+ rescue Exception
17
+ # require_relative 'smarter_csv/smarter_csv'
184
18
  end
19
+ # :nocov:
20
+ # when 'truffleruby'
21
+ # puts "\n\n truffleruby case in the load path | RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}\n\n"
22
+ # # this might not work - if you encounter problems, please contribute and create a PR
23
+ # # require 'truffleruby/smarter_csv'
24
+ else
25
+ puts <<-BLOCK_COMMENT
185
26
 
186
- class << self
187
- def has_acceleration?
188
- @has_acceleration ||= !!defined?(parse_csv_line_c)
189
- end
190
-
191
- def raw_header
192
- @raw_header
193
- end
194
-
195
- def headers
196
- @headers
197
- end
198
-
199
- # Counts the number of quote characters in a line, excluding escaped quotes.
200
- def count_quote_chars(line, quote_char)
201
- return 0 if line.nil? || quote_char.nil?
202
-
203
- count = 0
204
- previous_char = ''
205
-
206
- line.each_char do |char|
207
- count += 1 if char == quote_char && previous_char != '\\'
208
- previous_char = char
209
- end
210
-
211
- count
212
- end
213
-
214
- protected
215
-
216
- # NOTE: this is not called when "parse" methods are tested by themselves
217
- def default_options
218
- {
219
- acceleration: true,
220
- auto_row_sep_chars: 500,
221
- chunk_size: nil,
222
- col_sep: :auto, # was: ',',
223
- comment_regexp: nil, # was: /\A#/,
224
- convert_values_to_numeric: true,
225
- downcase_header: true,
226
- duplicate_header_suffix: nil,
227
- file_encoding: 'utf-8',
228
- force_simple_split: false,
229
- force_utf8: false,
230
- headers_in_file: true,
231
- invalid_byte_sequence: '',
232
- keep_original_headers: false,
233
- key_mapping: nil,
234
- quote_char: '"',
235
- remove_empty_hashes: true,
236
- remove_empty_values: true,
237
- remove_unmapped_keys: false,
238
- remove_values_matching: nil,
239
- remove_zero_values: false,
240
- required_headers: nil,
241
- required_keys: nil,
242
- row_sep: :auto, # was: $/,
243
- silence_missing_keys: false,
244
- skip_lines: nil,
245
- strings_as_keys: false,
246
- strip_chars_from_headers: nil,
247
- strip_whitespace: true,
248
- user_provided_headers: nil,
249
- value_converters: nil,
250
- verbose: false,
251
- with_line_numbers: false,
252
- }
253
- end
254
-
255
- def readline_with_counts(filehandle, options)
256
- line = filehandle.readline(options[:row_sep])
257
- @file_line_count += 1
258
- @csv_line_count += 1
259
- line = remove_bom(line) if @csv_line_count == 1
260
- line
261
- end
262
-
263
- def skip_lines(filehandle, options)
264
- return unless options[:skip_lines].to_i > 0
265
-
266
- options[:skip_lines].to_i.times do
267
- readline_with_counts(filehandle, options)
268
- end
269
- end
270
-
271
- def rewind(filehandle)
272
- @file_line_count = 0
273
- @csv_line_count = 0
274
- filehandle.rewind
275
- end
276
-
277
- ###
278
- ### Thin wrapper around C-extension
279
- ###
280
- def parse(line, options, header_size = nil)
281
- # puts "SmarterCSV.parse OPTIONS: #{options[:acceleration]}" if options[:verbose]
282
-
283
- if options[:acceleration] && has_acceleration?
284
- # :nocov:
285
- has_quotes = line =~ /#{options[:quote_char]}/
286
- elements = parse_csv_line_c(line, options[:col_sep], options[:quote_char], header_size)
287
- elements.map!{|x| cleanup_quotes(x, options[:quote_char])} if has_quotes
288
- [elements, elements.size]
289
- # :nocov:
290
- else
291
- # puts "WARNING: SmarterCSV is using un-accelerated parsing of lines. Check options[:acceleration]"
292
- parse_csv_line_ruby(line, options, header_size)
293
- end
294
- end
295
-
296
- # ------------------------------------------------------------------
297
- # Ruby equivalent of the C-extension for parse_line
298
- #
299
- # parses a single line: either a CSV header and body line
300
- # - quoting rules compared to RFC-4180 are somewhat relaxed
301
- # - we are not assuming that quotes inside a fields need to be doubled
302
- # - we are not assuming that all fields need to be quoted (0 is even)
303
- # - works with multi-char col_sep
304
- # - if header_size is given, only up to header_size fields are parsed
305
- #
306
- # We use header_size for parsing the body lines to make sure we always match the number of headers
307
- # in case there are trailing col_sep characters in line
308
- #
309
- # Our convention is that empty fields are returned as empty strings, not as nil.
310
- #
311
- #
312
- # the purpose of the max_size parameter is to handle a corner case where
313
- # CSV lines contain more fields than the header.
314
- # In which case the remaining fields in the line are ignored
315
- #
316
- def parse_csv_line_ruby(line, options, header_size = nil)
317
- return [] if line.nil?
318
-
319
- line_size = line.size
320
- col_sep = options[:col_sep]
321
- col_sep_size = col_sep.size
322
- quote = options[:quote_char]
323
- quote_count = 0
324
- elements = []
325
- start = 0
326
- i = 0
27
+ -------------------------------------------------------------------------
28
+ RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}
327
29
 
328
- previous_char = ''
329
- while i < line_size
330
- if line[i...i+col_sep_size] == col_sep && quote_count.even?
331
- break if !header_size.nil? && elements.size >= header_size
30
+ Acceleration via C-Extension is currently not supported for #{RUBY_ENGINE}
332
31
 
333
- elements << cleanup_quotes(line[start...i], quote)
334
- previous_char = line[i]
335
- i += col_sep.size
336
- start = i
337
- else
338
- quote_count += 1 if line[i] == quote && previous_char != '\\'
339
- previous_char = line[i]
340
- i += 1
341
- end
342
- end
343
- elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
344
- [elements, elements.size]
345
- end
346
-
347
- def cleanup_quotes(field, quote)
348
- return field if field.nil?
349
-
350
- # return if field !~ /#{quote}/ # this check can probably eliminated
351
-
352
- if field.start_with?(quote) && field.end_with?(quote)
353
- field.delete_prefix!(quote)
354
- field.delete_suffix!(quote)
355
- end
356
- field.gsub!("#{quote}#{quote}", quote)
357
- field
358
- end
359
-
360
- # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
361
- # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
362
- BLANK_RE = /\A\s*\z/.freeze
363
-
364
- def blank?(value)
365
- case value
366
- when String
367
- value.empty? || BLANK_RE.match?(value)
368
-
369
- when NilClass
370
- true
371
-
372
- when Array
373
- value.empty? || value.inject(true){|result, x| result &&= elem_blank?(x)}
374
-
375
- when Hash
376
- value.empty? || value.values.inject(true){|result, x| result &&= elem_blank?(x)}
377
-
378
- else
379
- false
380
- end
381
- end
382
-
383
- def elem_blank?(value)
384
- case value
385
- when String
386
- value.empty? || BLANK_RE.match?(value)
387
-
388
- when NilClass
389
- true
390
-
391
- else
392
- false
393
- end
394
- end
395
-
396
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
397
- def only_or_except_limit_execution(options, option_name, key)
398
- if options[option_name].is_a?(Hash)
399
- if options[option_name].has_key?(:except)
400
- return true if Array(options[option_name][:except]).include?(key)
401
- elsif options[option_name].has_key?(:only)
402
- return true unless Array(options[option_name][:only]).include?(key)
403
- end
404
- end
405
- false
406
- end
407
-
408
- # If file has headers, then guesses column separator from headers.
409
- # Otherwise guesses column separator from contents.
410
- # Raises exception if none is found.
411
- def guess_column_separator(filehandle, options)
412
- skip_lines(filehandle, options)
413
-
414
- delimiters = [',', "\t", ';', ':', '|']
415
-
416
- line = nil
417
- has_header = options[:headers_in_file]
418
- candidates = Hash.new(0)
419
- count = has_header ? 1 : 5
420
- count.times do
421
- line = readline_with_counts(filehandle, options)
422
- delimiters.each do |d|
423
- candidates[d] += line.scan(d).count
424
- end
425
- rescue EOFError # short files
426
- break
427
- end
428
- rewind(filehandle)
429
-
430
- if candidates.values.max == 0
431
- # if the header only contains
432
- return ',' if line.chomp(options[:row_sep]) =~ /^\w+$/
433
-
434
- raise SmarterCSV::NoColSepDetected
435
- end
436
-
437
- candidates.key(candidates.values.max)
438
- end
439
-
440
- # limitation: this currently reads the whole file in before making a decision
441
- def guess_line_ending(filehandle, options)
442
- counts = {"\n" => 0, "\r" => 0, "\r\n" => 0}
443
- quoted_char = false
444
-
445
- # count how many of the pre-defined line-endings we find
446
- # ignoring those contained within quote characters
447
- last_char = nil
448
- lines = 0
449
- filehandle.each_char do |c|
450
- quoted_char = !quoted_char if c == options[:quote_char]
451
- next if quoted_char
452
-
453
- if last_char == "\r"
454
- if c == "\n"
455
- counts["\r\n"] += 1
456
- else
457
- counts["\r"] += 1 # \r are counted after they appeared
458
- end
459
- elsif c == "\n"
460
- counts["\n"] += 1
461
- end
462
- last_char = c
463
- lines += 1
464
- break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
465
- end
466
- rewind(filehandle)
467
-
468
- counts["\r"] += 1 if last_char == "\r"
469
- # find the most frequent key/value pair:
470
- most_frequent_key, _count = counts.max_by{|_, v| v}
471
- most_frequent_key
472
- end
473
-
474
- def process_headers(filehandle, options)
475
- @raw_header = nil
476
- @headers = nil
477
- if options[:headers_in_file] # extract the header line
478
- # process the header line in the CSV file..
479
- # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
480
- header = readline_with_counts(filehandle, options)
481
- @raw_header = header
32
+ Please contribute and create a pull request if you need this
33
+ -------------------------------------------------------------------------
482
34
 
483
- header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
484
- header = header.sub(options[:comment_regexp], '') if options[:comment_regexp]
485
- header = header.chomp(options[:row_sep])
486
-
487
- header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
488
-
489
- file_headerA, file_header_size = parse(header, options)
490
-
491
- file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
492
- file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
493
-
494
- unless options[:keep_original_headers]
495
- file_headerA.map!{|x| x.gsub(/\s+|-+/, '_')}
496
- file_headerA.map!{|x| x.downcase} if options[:downcase_header]
497
- end
498
- else
499
- raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
500
- end
501
- if options[:user_provided_headers] && options[:user_provided_headers].class == Array && !options[:user_provided_headers].empty?
502
- # use user-provided headers
503
- headerA = options[:user_provided_headers]
504
- if defined?(file_header_size) && !file_header_size.nil?
505
- if headerA.size != file_header_size
506
- raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file has #{file_header_size} headers"
507
- else
508
- # we could print out the mapping of file_headerA to headerA here
509
- end
510
- end
511
- else
512
- headerA = file_headerA
513
- end
514
-
515
- # detect duplicate headers and disambiguate
516
- headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
517
- header_size = headerA.size # used for splitting lines
518
-
519
- headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
520
-
521
- unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
522
- key_mappingH = options[:key_mapping]
523
-
524
- # do some key mapping on the keys in the file header
525
- # if you want to completely delete a key, then map it to nil or to ''
526
- if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
527
- # if silence_missing_keys are not set, raise error if missing header
528
- missing_keys = key_mappingH.keys - headerA
529
- # if the user passes a list of speciffic mapped keys that are optional
530
- missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array)
531
-
532
- unless missing_keys.empty? || options[:silence_missing_keys] == true
533
- raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}"
534
- end
535
-
536
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
537
- end
538
- end
539
-
540
- # header_validations
541
- duplicate_headers = []
542
- headerA.compact.each do |k|
543
- duplicate_headers << k if headerA.select{|x| x == k}.size > 1
544
- end
545
-
546
- unless options[:user_provided_headers] || duplicate_headers.empty?
547
- raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
548
- end
549
-
550
- # deprecate required_headers
551
- unless options[:required_headers].nil?
552
- puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'"
553
- if options[:required_keys].nil?
554
- options[:required_keys] = options[:required_headers]
555
- options[:required_headers] = nil
556
- end
557
- end
558
-
559
- if options[:required_keys] && options[:required_keys].is_a?(Array)
560
- missing_keys = []
561
- options[:required_keys].each do |k|
562
- missing_keys << k unless headerA.include?(k)
563
- end
564
- raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
565
- end
566
-
567
- @headers = headerA
568
- [headerA, header_size]
569
- end
570
-
571
- def process_duplicate_headers(headers, options)
572
- counts = Hash.new(0)
573
- result = []
574
- headers.each do |key|
575
- counts[key] += 1
576
- if counts[key] == 1
577
- result << key
578
- else
579
- result << [key, options[:duplicate_header_suffix], counts[key]].join
580
- end
581
- end
582
- result
583
- end
584
-
585
- private
586
-
587
- UTF_32_BOM = %w[0 0 fe ff].freeze
588
- UTF_32LE_BOM = %w[ff fe 0 0].freeze
589
- UTF_8_BOM = %w[ef bb bf].freeze
590
- UTF_16_BOM = %w[fe ff].freeze
591
- UTF_16LE_BOM = %w[ff fe].freeze
592
-
593
- def remove_bom(str)
594
- str_as_hex = str.bytes.map{|x| x.to_s(16)}
595
- # if string does not start with one of the bytes, there is no BOM
596
- return str unless %w[ef fe ff 0].include?(str_as_hex[0])
597
-
598
- return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
599
- return str.byteslice(3..-1) if str_as_hex[0..2] == UTF_8_BOM
600
- return str.byteslice(2..-1) if [UTF_16_BOM, UTF_16LE_BOM].include?(str_as_hex[0..1])
601
-
602
- puts "SmarterCSV found unhandled BOM! #{str.chars[0..7].inspect}"
603
- str
604
- end
605
-
606
- def validate_options!(options)
607
- keys = options.keys
608
- errors = []
609
- errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
610
- errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
611
- errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
612
- raise SmarterCSV::ValidationError, errors.inspect if errors.any?
613
- end
614
-
615
- def option_valid?(str)
616
- return true if str.is_a?(Symbol) && str == :auto
617
- return true if str.is_a?(String) && !str.empty?
618
-
619
- false
620
- end
621
- end
35
+ BLOCK_COMMENT
622
36
  end
37
+ # :nocov:
38
+ require "smarter_csv/smarter_csv"
data/smarter_csv.gemspec CHANGED
@@ -31,8 +31,9 @@ Gem::Specification.new do |spec|
31
31
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
32
32
 
33
33
  spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
34
- spec.require_paths = ["lib"] # add ext here?
34
+ spec.require_paths = %w[lib ext]
35
35
  spec.extensions = ["ext/smarter_csv/extconf.rb"]
36
+ spec.files += Dir.glob("ext/smarter_csv/**/*")
36
37
 
37
38
  spec.add_development_dependency "awesome_print"
38
39
  spec.add_development_dependency "codecov"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-05 00:00:00.000000000 Z
11
+ date: 2023-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print
@@ -115,8 +115,10 @@ files:
115
115
  - TO_DO_v2.md
116
116
  - ext/smarter_csv/extconf.rb
117
117
  - ext/smarter_csv/smarter_csv.c
118
- - lib/extensions/hash.rb
118
+ - lib/core_ext/hash.rb
119
119
  - lib/smarter_csv.rb
120
+ - lib/smarter_csv/options_processing.rb
121
+ - lib/smarter_csv/smarter_csv.rb
120
122
  - lib/smarter_csv/version.rb
121
123
  - smarter_csv.gemspec
122
124
  homepage: https://github.com/tilo/smarter_csv
@@ -130,6 +132,7 @@ post_install_message:
130
132
  rdoc_options: []
131
133
  require_paths:
132
134
  - lib
135
+ - ext
133
136
  required_ruby_version: !ruby/object:Gem::Requirement
134
137
  requirements:
135
138
  - - ">="
File without changes