smarter_csv 1.8.5 → 1.9.2.pre01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/smarter_csv.rb CHANGED
@@ -1,617 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "extensions/hash"
4
- require_relative "smarter_csv/version"
3
+ require "core_ext/hash"
5
4
 
6
- require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
7
- # require 'smarter_csv.bundle' unless ENV['CI'] # local testing
5
+ require "smarter_csv/version"
6
+ require "smarter_csv/options_processing"
8
7
 
9
- module SmarterCSV
10
- class SmarterCSVException < StandardError; end
11
- class HeaderSizeMismatch < SmarterCSVException; end
12
- class IncorrectOption < SmarterCSVException; end
13
- class ValidationError < SmarterCSVException; end
14
- class DuplicateHeaders < SmarterCSVException; end
15
- class MissingHeaders < SmarterCSVException; end
16
- class NoColSepDetected < SmarterCSVException; end
17
- class KeyMappingError < SmarterCSVException; end # CURRENTLY UNUSED -> version 1.9.0
18
-
19
- # first parameter: filename or input object which responds to readline method
20
- def SmarterCSV.process(input, options = {}, &block)
21
- options = default_options.merge(options)
22
- options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
23
- puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
24
- validate_options!(options)
25
-
26
- headerA = []
27
- result = []
28
- @file_line_count = 0
29
- @csv_line_count = 0
30
- has_rails = !!defined?(Rails)
31
- begin
32
- fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
33
-
34
- # auto-detect the row separator
35
- options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
36
- # attempt to auto-detect column separator
37
- options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto
38
-
39
- if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
40
- puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
41
- end
42
-
43
- skip_lines(fh, options)
44
-
45
- headerA, header_size = process_headers(fh, options)
46
-
47
- # in case we use chunking.. we'll need to set it up..
48
- if !options[:chunk_size].nil? && options[:chunk_size].to_i > 0
49
- use_chunks = true
50
- chunk_size = options[:chunk_size].to_i
51
- chunk_count = 0
52
- chunk = []
53
- else
54
- use_chunks = false
55
- end
56
-
57
- # now on to processing all the rest of the lines in the CSV file:
58
- until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
59
- line = readline_with_counts(fh, options)
60
-
61
- # replace invalid byte sequence in UTF-8 with question mark to avoid errors
62
- line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
63
-
64
- print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
65
-
66
- next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
67
-
68
- # cater for the quoted csv data containing the row separator carriage return character
69
- # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
70
- # by detecting the existence of an uneven number of quote characters
71
-
72
- multiline = count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
73
- while count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
74
- next_line = fh.readline(options[:row_sep])
75
- next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
76
- line += next_line
77
- @file_line_count += 1
78
- end
79
- print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
80
-
81
- line.chomp!(options[:row_sep])
82
-
83
- dataA, _data_size = parse(line, options, header_size)
84
-
85
- dataA.map!{|x| x.strip} if options[:strip_whitespace]
86
-
87
- # if all values are blank, then ignore this line
88
- next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))
89
-
90
- hash = Hash.zip(headerA, dataA) # from Facets of Ruby library
91
-
92
- # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
93
- # Note: Ruby < 1.9 doesn't allow empty symbol literals!
94
- hash.delete(nil)
95
- hash.delete('')
96
- eval('hash.delete(:"")') if RUBY_VERSION.to_f > 1.8
97
-
98
- if options[:remove_empty_values] == true
99
- hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)}
100
- end
101
-
102
- hash.delete_if{|_k, v| !v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
103
- hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
104
-
105
- if options[:convert_values_to_numeric]
106
- hash.each do |k, v|
107
- # deal with the :only / :except options to :convert_values_to_numeric
108
- next if only_or_except_limit_execution(options, :convert_values_to_numeric, k)
109
-
110
- # convert if it's a numeric value:
111
- case v
112
- when /^[+-]?\d+\.\d+$/
113
- hash[k] = v.to_f
114
- when /^[+-]?\d+$/
115
- hash[k] = v.to_i
116
- end
117
- end
118
- end
119
-
120
- if options[:value_converters]
121
- hash.each do |k, v|
122
- converter = options[:value_converters][k]
123
- next unless converter
124
-
125
- hash[k] = converter.convert(v)
126
- end
127
- end
128
-
129
- next if options[:remove_empty_hashes] && hash.empty?
130
-
131
- hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
132
-
133
- if use_chunks
134
- chunk << hash # append temp result to chunk
135
-
136
- if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
137
- # do something with the chunk
138
- if block_given?
139
- yield chunk # do something with the hashes in the chunk in the block
140
- else
141
- result << chunk # not sure yet, why anybody would want to do this without a block
142
- end
143
- chunk_count += 1
144
- chunk = [] # initialize for next chunk of data
145
- else
146
-
147
- # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18)
148
-
149
- end
150
-
151
- # while a chunk is being filled up we don't need to do anything else here
152
-
153
- else # no chunk handling
154
- if block_given?
155
- yield [hash] # do something with the hash in the block (better to use chunking here)
156
- else
157
- result << hash
158
- end
159
- end
160
- end
161
-
162
- # print new line to retain last processing line message
163
- print "\n" if options[:verbose]
164
-
165
- # last chunk:
166
- if !chunk.nil? && chunk.size > 0
167
- # do something with the chunk
168
- if block_given?
169
- yield chunk # do something with the hashes in the chunk in the block
170
- else
171
- result << chunk # not sure yet, why anybody would want to do this without a block
172
- end
173
- chunk_count += 1
174
- chunk = [] # initialize for next chunk of data
175
- end
176
- ensure
177
- fh.close if fh.respond_to?(:close)
178
- end
179
- if block_given?
180
- return chunk_count # when we do processing through a block we only care how many chunks we processed
8
+ case RUBY_ENGINE
9
+ when 'ruby'
10
+ begin
11
+ if `uname -s`.chomp == 'Darwin'
12
+ require 'smarter_csv/smarter_csv.bundle'
181
13
  else
182
- return result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
14
+ require_relative "smarter_csv/smarter_csv"
183
15
  end
16
+ rescue Exception
17
+ # require_relative 'smarter_csv/smarter_csv'
184
18
  end
19
+ # :nocov:
20
+ # when 'truffleruby'
21
+ # puts "\n\n truffleruby case in the load path | RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}\n\n"
22
+ # # this might not work - if you encounter problems, please contribute and create a PR
23
+ # # require 'truffleruby/smarter_csv'
24
+ else
25
+ puts <<-BLOCK_COMMENT
185
26
 
186
- class << self
187
- def has_acceleration?
188
- @has_acceleration ||= !!defined?(parse_csv_line_c)
189
- end
190
-
191
- def raw_header
192
- @raw_header
193
- end
194
-
195
- def headers
196
- @headers
197
- end
198
-
199
- # Counts the number of quote characters in a line, excluding escaped quotes.
200
- def count_quote_chars(line, quote_char)
201
- return 0 if line.nil? || quote_char.nil?
202
-
203
- count = 0
204
- previous_char = ''
205
-
206
- line.each_char do |char|
207
- count += 1 if char == quote_char && previous_char != '\\'
208
- previous_char = char
209
- end
210
-
211
- count
212
- end
213
-
214
- protected
215
-
216
- # NOTE: this is not called when "parse" methods are tested by themselves
217
- def default_options
218
- {
219
- acceleration: true,
220
- auto_row_sep_chars: 500,
221
- chunk_size: nil,
222
- col_sep: :auto, # was: ',',
223
- comment_regexp: nil, # was: /\A#/,
224
- convert_values_to_numeric: true,
225
- downcase_header: true,
226
- duplicate_header_suffix: nil,
227
- file_encoding: 'utf-8',
228
- force_simple_split: false,
229
- force_utf8: false,
230
- headers_in_file: true,
231
- invalid_byte_sequence: '',
232
- keep_original_headers: false,
233
- key_mapping: nil,
234
- quote_char: '"',
235
- remove_empty_hashes: true,
236
- remove_empty_values: true,
237
- remove_unmapped_keys: false,
238
- remove_values_matching: nil,
239
- remove_zero_values: false,
240
- required_headers: nil,
241
- required_keys: nil,
242
- row_sep: :auto, # was: $/,
243
- silence_missing_keys: false,
244
- skip_lines: nil,
245
- strings_as_keys: false,
246
- strip_chars_from_headers: nil,
247
- strip_whitespace: true,
248
- user_provided_headers: nil,
249
- value_converters: nil,
250
- verbose: false,
251
- with_line_numbers: false,
252
- }
253
- end
254
-
255
- def readline_with_counts(filehandle, options)
256
- line = filehandle.readline(options[:row_sep])
257
- @file_line_count += 1
258
- @csv_line_count += 1
259
- line = remove_bom(line) if @csv_line_count == 1
260
- line
261
- end
262
-
263
- def skip_lines(filehandle, options)
264
- return unless options[:skip_lines].to_i > 0
265
-
266
- options[:skip_lines].to_i.times do
267
- readline_with_counts(filehandle, options)
268
- end
269
- end
270
-
271
- def rewind(filehandle)
272
- @file_line_count = 0
273
- @csv_line_count = 0
274
- filehandle.rewind
275
- end
276
-
277
- ###
278
- ### Thin wrapper around C-extension
279
- ###
280
- def parse(line, options, header_size = nil)
281
- # puts "SmarterCSV.parse OPTIONS: #{options[:acceleration]}" if options[:verbose]
282
-
283
- if options[:acceleration] && has_acceleration?
284
- # :nocov:
285
- has_quotes = line =~ /#{options[:quote_char]}/
286
- elements = parse_csv_line_c(line, options[:col_sep], options[:quote_char], header_size)
287
- elements.map!{|x| cleanup_quotes(x, options[:quote_char])} if has_quotes
288
- return [elements, elements.size]
289
- # :nocov:
290
- else
291
- # puts "WARNING: SmarterCSV is using un-accelerated parsing of lines. Check options[:acceleration]"
292
- return parse_csv_line_ruby(line, options, header_size)
293
- end
294
- end
295
-
296
- # ------------------------------------------------------------------
297
- # Ruby equivalent of the C-extension for parse_line
298
- #
299
- # parses a single line: either a CSV header and body line
300
- # - quoting rules compared to RFC-4180 are somewhat relaxed
301
- # - we are not assuming that quotes inside a fields need to be doubled
302
- # - we are not assuming that all fields need to be quoted (0 is even)
303
- # - works with multi-char col_sep
304
- # - if header_size is given, only up to header_size fields are parsed
305
- #
306
- # We use header_size for parsing the body lines to make sure we always match the number of headers
307
- # in case there are trailing col_sep characters in line
308
- #
309
- # Our convention is that empty fields are returned as empty strings, not as nil.
310
- #
311
- #
312
- # the purpose of the max_size parameter is to handle a corner case where
313
- # CSV lines contain more fields than the header.
314
- # In which case the remaining fields in the line are ignored
315
- #
316
- def parse_csv_line_ruby(line, options, header_size = nil)
317
- return [] if line.nil?
27
+ -------------------------------------------------------------------------
28
+ RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}
318
29
 
319
- line_size = line.size
320
- col_sep = options[:col_sep]
321
- col_sep_size = col_sep.size
322
- quote = options[:quote_char]
323
- quote_count = 0
324
- elements = []
325
- start = 0
326
- i = 0
30
+ Acceleration via C-Extension is currently not supported for #{RUBY_ENGINE}
327
31
 
328
- previous_char = ''
329
- while i < line_size
330
- if line[i...i+col_sep_size] == col_sep && quote_count.even?
331
- break if !header_size.nil? && elements.size >= header_size
32
+ Please contribute and create a pull request if you need this
33
+ -------------------------------------------------------------------------
332
34
 
333
- elements << cleanup_quotes(line[start...i], quote)
334
- previous_char = line[i]
335
- i += col_sep.size
336
- start = i
337
- else
338
- quote_count += 1 if line[i] == quote && previous_char != '\\'
339
- previous_char = line[i]
340
- i += 1
341
- end
342
- end
343
- elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
344
- [elements, elements.size]
345
- end
346
-
347
- def cleanup_quotes(field, quote)
348
- return field if field.nil?
349
-
350
- # return if field !~ /#{quote}/ # this check can probably eliminated
351
-
352
- if field.start_with?(quote) && field.end_with?(quote)
353
- field.delete_prefix!(quote)
354
- field.delete_suffix!(quote)
355
- end
356
- field.gsub!("#{quote}#{quote}", quote)
357
- field
358
- end
359
-
360
- # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
361
- # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
362
- BLANK_RE = /\A\s*\z/.freeze
363
-
364
- def blank?(value)
365
- case value
366
- when String
367
- value.empty? || BLANK_RE.match?(value)
368
-
369
- when NilClass
370
- true
371
-
372
- when Array
373
- value.empty? || value.inject(true){|result, x| result &&= elem_blank?(x)}
374
-
375
- when Hash
376
- value.empty? || value.values.inject(true){|result, x| result &&= elem_blank?(x)}
377
-
378
- else
379
- false
380
- end
381
- end
382
-
383
- def elem_blank?(value)
384
- case value
385
- when String
386
- value.empty? || BLANK_RE.match?(value)
387
-
388
- when NilClass
389
- true
390
-
391
- else
392
- false
393
- end
394
- end
395
-
396
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
397
- def only_or_except_limit_execution(options, option_name, key)
398
- if options[option_name].is_a?(Hash)
399
- if options[option_name].has_key?(:except)
400
- return true if Array(options[option_name][:except]).include?(key)
401
- elsif options[option_name].has_key?(:only)
402
- return true unless Array(options[option_name][:only]).include?(key)
403
- end
404
- end
405
- return false
406
- end
407
-
408
- # If file has headers, then guesses column separator from headers.
409
- # Otherwise guesses column separator from contents.
410
- # Raises exception if none is found.
411
- def guess_column_separator(filehandle, options)
412
- skip_lines(filehandle, options)
413
-
414
- delimiters = [',', "\t", ';', ':', '|']
415
-
416
- line = nil
417
- has_header = options[:headers_in_file]
418
- candidates = Hash.new(0)
419
- count = has_header ? 1 : 5
420
- count.times do
421
- line = readline_with_counts(filehandle, options)
422
- delimiters.each do |d|
423
- candidates[d] += line.scan(d).count
424
- end
425
- rescue EOFError # short files
426
- break
427
- end
428
- rewind(filehandle)
429
-
430
- if candidates.values.max == 0
431
- # if the header only contains
432
- return ',' if line.chomp(options[:row_sep]) =~ /^\w+$/
433
-
434
- raise SmarterCSV::NoColSepDetected
435
- end
436
-
437
- candidates.key(candidates.values.max)
438
- end
439
-
440
- # limitation: this currently reads the whole file in before making a decision
441
- def guess_line_ending(filehandle, options)
442
- counts = {"\n" => 0, "\r" => 0, "\r\n" => 0}
443
- quoted_char = false
444
-
445
- # count how many of the pre-defined line-endings we find
446
- # ignoring those contained within quote characters
447
- last_char = nil
448
- lines = 0
449
- filehandle.each_char do |c|
450
- quoted_char = !quoted_char if c == options[:quote_char]
451
- next if quoted_char
452
-
453
- if last_char == "\r"
454
- if c == "\n"
455
- counts["\r\n"] += 1
456
- else
457
- counts["\r"] += 1 # \r are counted after they appeared
458
- end
459
- elsif c == "\n"
460
- counts["\n"] += 1
461
- end
462
- last_char = c
463
- lines += 1
464
- break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
465
- end
466
- rewind(filehandle)
467
-
468
- counts["\r"] += 1 if last_char == "\r"
469
- # find the most frequent key/value pair:
470
- k, _ = counts.max_by{|_, v| v}
471
- return k
472
- end
473
-
474
- def process_headers(filehandle, options)
475
- @raw_header = nil
476
- @headers = nil
477
- if options[:headers_in_file] # extract the header line
478
- # process the header line in the CSV file..
479
- # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
480
- header = readline_with_counts(filehandle, options)
481
- @raw_header = header
482
-
483
- header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
484
- header = header.sub(options[:comment_regexp], '') if options[:comment_regexp]
485
- header = header.chomp(options[:row_sep])
486
-
487
- header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
488
-
489
- file_headerA, file_header_size = parse(header, options)
490
-
491
- file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
492
- file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
493
- unless options[:keep_original_headers]
494
- file_headerA.map!{|x| x.gsub(/\s+|-+/, '_')}
495
- file_headerA.map!{|x| x.downcase} if options[:downcase_header]
496
- end
497
- else
498
- raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
499
- end
500
- if options[:user_provided_headers] && options[:user_provided_headers].class == Array && !options[:user_provided_headers].empty?
501
- # use user-provided headers
502
- headerA = options[:user_provided_headers]
503
- if defined?(file_header_size) && !file_header_size.nil?
504
- if headerA.size != file_header_size
505
- raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file has #{file_header_size} headers"
506
- else
507
- # we could print out the mapping of file_headerA to headerA here
508
- end
509
- end
510
- else
511
- headerA = file_headerA
512
- end
513
-
514
- # detect duplicate headers and disambiguate
515
- headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
516
- header_size = headerA.size # used for splitting lines
517
-
518
- headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
519
-
520
- unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
521
- key_mappingH = options[:key_mapping]
522
-
523
- # do some key mapping on the keys in the file header
524
- # if you want to completely delete a key, then map it to nil or to ''
525
- if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
526
- unless options[:silence_missing_keys]
527
- # if silence_missing_keys are not set, raise error if missing header
528
- missing_keys = key_mappingH.keys - headerA
529
- puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
530
- end
531
-
532
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
533
- end
534
- end
535
-
536
- # header_validations
537
- duplicate_headers = []
538
- headerA.compact.each do |k|
539
- duplicate_headers << k if headerA.select{|x| x == k}.size > 1
540
- end
541
-
542
- unless options[:user_provided_headers] || duplicate_headers.empty?
543
- raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
544
- end
545
-
546
- # deprecate required_headers
547
- if !options[:required_headers].nil?
548
- puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required headers'"
549
- if options[:required_keys].nil?
550
- options[:required_keys] = options[:required_headers]
551
- options[:required_headers] = nil
552
- end
553
- end
554
-
555
- if options[:required_keys] && options[:required_keys].is_a?(Array)
556
- missing_keys = []
557
- options[:required_keys].each do |k|
558
- missing_keys << k unless headerA.include?(k)
559
- end
560
- raise SmarterCSV::MissingHeaders, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
561
- end
562
-
563
- @headers = headerA
564
- [headerA, header_size]
565
- end
566
-
567
- def process_duplicate_headers(headers, options)
568
- counts = Hash.new(0)
569
- result = []
570
- headers.each do |key|
571
- counts[key] += 1
572
- if counts[key] == 1
573
- result << key
574
- else
575
- result << [key, options[:duplicate_header_suffix], counts[key]].join
576
- end
577
- end
578
- result
579
- end
580
-
581
- private
582
-
583
- UTF_32_BOM = %w[0 0 fe ff].freeze
584
- UTF_32LE_BOM = %w[ff fe 0 0].freeze
585
- UTF_8_BOM = %w[ef bb bf].freeze
586
- UTF_16_BOM = %w[fe ff].freeze
587
- UTF_16LE_BOM = %w[ff fe].freeze
588
-
589
- def remove_bom(str)
590
- str_as_hex = str.bytes.map{|x| x.to_s(16)}
591
- # if string does not start with one of the bytes, there is no BOM
592
- return str unless %w[ef fe ff 0].include?(str_as_hex[0])
593
-
594
- return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
595
- return str.byteslice(3..-1) if str_as_hex[0..2] == UTF_8_BOM
596
- return str.byteslice(2..-1) if [UTF_16_BOM, UTF_16LE_BOM].include?(str_as_hex[0..1])
597
-
598
- puts "SmarterCSV found unhandled BOM! #{str.chars[0..7].inspect}"
599
- str
600
- end
601
-
602
- def validate_options!(options)
603
- keys = options.keys
604
- errors = []
605
- errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
606
- errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
607
- errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
608
- raise SmarterCSV::ValidationError, errors.inspect if errors.any?
609
- end
610
-
611
- def option_valid?(str)
612
- return true if str.is_a?(Symbol) && str == :auto
613
- return true if str.is_a?(String) && !str.empty?
614
- false
615
- end
616
- end
35
+ BLOCK_COMMENT
617
36
  end
37
+ # :nocov:
38
+ require "smarter_csv/smarter_csv"
39
+
data/smarter_csv.gemspec CHANGED
@@ -1,5 +1,7 @@
1
- # -*- encoding: utf-8 -*-
2
- require File.expand_path('../lib/smarter_csv/version', __FILE__)
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require File.expand_path('lib/smarter_csv/version', __dir__)
3
5
 
4
6
  Gem::Specification.new do |spec|
5
7
  spec.name = "smarter_csv"
@@ -7,8 +9,8 @@ Gem::Specification.new do |spec|
7
9
  spec.authors = ["Tilo Sloboda"]
8
10
  spec.email = ["tilo.sloboda@gmail.com"]
9
11
 
10
- spec.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
11
- spec.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
12
+ spec.summary = "Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files"
13
+ spec.description = "Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys"
12
14
  spec.homepage = "https://github.com/tilo/smarter_csv"
13
15
  spec.license = 'MIT'
14
16
 
@@ -16,6 +18,8 @@ Gem::Specification.new do |spec|
16
18
  spec.metadata["source_code_uri"] = spec.homepage
17
19
  spec.metadata["changelog_uri"] = "https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md"
18
20
 
21
+ spec.required_ruby_version = ">= 2.5.0"
22
+
19
23
  # Specify which files should be added to the gem when it is released.
20
24
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
25
  spec.files = Dir.chdir(__dir__) do
@@ -27,9 +31,9 @@ Gem::Specification.new do |spec|
27
31
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
28
32
 
29
33
  spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
30
- spec.require_paths = ["lib"] # add ext here?
34
+ spec.require_paths = %w[lib ext]
31
35
  spec.extensions = ["ext/smarter_csv/extconf.rb"]
32
-
36
+ spec.files += Dir.glob("ext/smarter_csv/**/*")
33
37
 
34
38
  spec.add_development_dependency "awesome_print"
35
39
  spec.add_development_dependency "codecov"