smarter_csv 1.9.0 → 1.9.2.pre01

Sign up to get free protection for your applications and to get access to all the features.
data/lib/smarter_csv.rb CHANGED
@@ -1,622 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "extensions/hash"
4
- require_relative "smarter_csv/version"
3
+ require "core_ext/hash"
5
4
 
6
- require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
7
- # require 'smarter_csv.bundle' unless ENV['CI'] # local testing
5
+ require "smarter_csv/version"
6
+ require "smarter_csv/options_processing"
8
7
 
9
- module SmarterCSV
10
- class SmarterCSVException < StandardError; end
11
- class HeaderSizeMismatch < SmarterCSVException; end
12
- class IncorrectOption < SmarterCSVException; end
13
- class ValidationError < SmarterCSVException; end
14
- class DuplicateHeaders < SmarterCSVException; end
15
- class MissingKeys < SmarterCSVException; end # previously known as MissingHeaders
16
- class NoColSepDetected < SmarterCSVException; end
17
- class KeyMappingError < SmarterCSVException; end
18
-
19
- # first parameter: filename or input object which responds to readline method
20
- def SmarterCSV.process(input, options = {}, &block) # rubocop:disable Lint/UnusedMethodArgument
21
- options = default_options.merge(options)
22
- options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
23
- puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
24
- validate_options!(options)
25
-
26
- headerA = []
27
- result = []
28
- @file_line_count = 0
29
- @csv_line_count = 0
30
- has_rails = !!defined?(Rails)
31
- begin
32
- fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
33
-
34
- # auto-detect the row separator
35
- options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
36
- # attempt to auto-detect column separator
37
- options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto
38
-
39
- if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
40
- puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
41
- end
42
-
43
- skip_lines(fh, options)
44
-
45
- headerA, header_size = process_headers(fh, options)
46
-
47
- # in case we use chunking.. we'll need to set it up..
48
- if !options[:chunk_size].nil? && options[:chunk_size].to_i > 0
49
- use_chunks = true
50
- chunk_size = options[:chunk_size].to_i
51
- chunk_count = 0
52
- chunk = []
53
- else
54
- use_chunks = false
55
- end
56
-
57
- # now on to processing all the rest of the lines in the CSV file:
58
- until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
59
- line = readline_with_counts(fh, options)
60
-
61
- # replace invalid byte sequence in UTF-8 with question mark to avoid errors
62
- line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
63
-
64
- print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
65
-
66
- next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
67
-
68
- # cater for the quoted csv data containing the row separator carriage return character
69
- # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
70
- # by detecting the existence of an uneven number of quote characters
71
-
72
- multiline = count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
73
- while count_quote_chars(line, options[:quote_char]).odd? # should handle quote_char nil
74
- next_line = fh.readline(options[:row_sep])
75
- next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
76
- line += next_line
77
- @file_line_count += 1
78
- end
79
- print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
80
-
81
- line.chomp!(options[:row_sep])
82
-
83
- dataA, _data_size = parse(line, options, header_size)
84
-
85
- dataA.map!{|x| x.strip} if options[:strip_whitespace]
86
-
87
- # if all values are blank, then ignore this line
88
- next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))
89
-
90
- hash = Hash.zip(headerA, dataA) # from Facets of Ruby library
91
-
92
- # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
93
- # Note: Ruby < 1.9 doesn't allow empty symbol literals!
94
- hash.delete(nil)
95
- hash.delete('')
96
- eval('hash.delete(:"")') if RUBY_VERSION.to_f > 1.8
97
-
98
- if options[:remove_empty_values] == true
99
- hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)}
100
- end
101
-
102
- hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings
103
- hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
104
-
105
- if options[:convert_values_to_numeric]
106
- hash.each do |k, v|
107
- # deal with the :only / :except options to :convert_values_to_numeric
108
- next if only_or_except_limit_execution(options, :convert_values_to_numeric, k)
109
-
110
- # convert if it's a numeric value:
111
- case v
112
- when /^[+-]?\d+\.\d+$/
113
- hash[k] = v.to_f
114
- when /^[+-]?\d+$/
115
- hash[k] = v.to_i
116
- end
117
- end
118
- end
119
-
120
- if options[:value_converters]
121
- hash.each do |k, v|
122
- converter = options[:value_converters][k]
123
- next unless converter
124
-
125
- hash[k] = converter.convert(v)
126
- end
127
- end
128
-
129
- next if options[:remove_empty_hashes] && hash.empty?
130
-
131
- hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
132
-
133
- if use_chunks
134
- chunk << hash # append temp result to chunk
135
-
136
- if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
137
- # do something with the chunk
138
- if block_given?
139
- yield chunk # do something with the hashes in the chunk in the block
140
- else
141
- result << chunk # not sure yet, why anybody would want to do this without a block
142
- end
143
- chunk_count += 1
144
- chunk = [] # initialize for next chunk of data
145
- else
146
-
147
- # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18)
148
-
149
- end
150
-
151
- # while a chunk is being filled up we don't need to do anything else here
152
-
153
- else # no chunk handling
154
- if block_given?
155
- yield [hash] # do something with the hash in the block (better to use chunking here)
156
- else
157
- result << hash
158
- end
159
- end
160
- end
161
-
162
- # print new line to retain last processing line message
163
- print "\n" if options[:verbose]
164
-
165
- # last chunk:
166
- if !chunk.nil? && chunk.size > 0
167
- # do something with the chunk
168
- if block_given?
169
- yield chunk # do something with the hashes in the chunk in the block
170
- else
171
- result << chunk # not sure yet, why anybody would want to do this without a block
172
- end
173
- chunk_count += 1
174
- # chunk = [] # initialize for next chunk of data
175
- end
176
- ensure
177
- fh.close if fh.respond_to?(:close)
178
- end
179
- if block_given?
180
- chunk_count # when we do processing through a block we only care how many chunks we processed
8
+ case RUBY_ENGINE
9
+ when 'ruby'
10
+ begin
11
+ if `uname -s`.chomp == 'Darwin'
12
+ require 'smarter_csv/smarter_csv.bundle'
181
13
  else
182
- result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
14
+ require_relative "smarter_csv/smarter_csv"
183
15
  end
16
+ rescue Exception
17
+ # require_relative 'smarter_csv/smarter_csv'
184
18
  end
19
+ # :nocov:
20
+ # when 'truffleruby'
21
+ # puts "\n\n truffleruby case in the load path | RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}\n\n"
22
+ # # this might not work - if you encounter problems, please contribute and create a PR
23
+ # # require 'truffleruby/smarter_csv'
24
+ else
25
+ puts <<-BLOCK_COMMENT
185
26
 
186
- class << self
187
- def has_acceleration?
188
- @has_acceleration ||= !!defined?(parse_csv_line_c)
189
- end
190
-
191
- def raw_header
192
- @raw_header
193
- end
194
-
195
- def headers
196
- @headers
197
- end
198
-
199
- # Counts the number of quote characters in a line, excluding escaped quotes.
200
- def count_quote_chars(line, quote_char)
201
- return 0 if line.nil? || quote_char.nil?
202
-
203
- count = 0
204
- previous_char = ''
27
+ -------------------------------------------------------------------------
28
+ RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}
205
29
 
206
- line.each_char do |char|
207
- count += 1 if char == quote_char && previous_char != '\\'
208
- previous_char = char
209
- end
30
+ Acceleration via C-Extension is currently not supported for #{RUBY_ENGINE}
210
31
 
211
- count
212
- end
213
-
214
- protected
215
-
216
- # NOTE: this is not called when "parse" methods are tested by themselves
217
- def default_options
218
- {
219
- acceleration: true,
220
- auto_row_sep_chars: 500,
221
- chunk_size: nil,
222
- col_sep: :auto, # was: ',',
223
- comment_regexp: nil, # was: /\A#/,
224
- convert_values_to_numeric: true,
225
- downcase_header: true,
226
- duplicate_header_suffix: nil,
227
- file_encoding: 'utf-8',
228
- force_simple_split: false,
229
- force_utf8: false,
230
- headers_in_file: true,
231
- invalid_byte_sequence: '',
232
- keep_original_headers: false,
233
- key_mapping: nil,
234
- quote_char: '"',
235
- remove_empty_hashes: true,
236
- remove_empty_values: true,
237
- remove_unmapped_keys: false,
238
- remove_values_matching: nil,
239
- remove_zero_values: false,
240
- required_headers: nil,
241
- required_keys: nil,
242
- row_sep: :auto, # was: $/,
243
- silence_missing_keys: false,
244
- skip_lines: nil,
245
- strings_as_keys: false,
246
- strip_chars_from_headers: nil,
247
- strip_whitespace: true,
248
- user_provided_headers: nil,
249
- value_converters: nil,
250
- verbose: false,
251
- with_line_numbers: false,
252
- }
253
- end
254
-
255
- def readline_with_counts(filehandle, options)
256
- line = filehandle.readline(options[:row_sep])
257
- @file_line_count += 1
258
- @csv_line_count += 1
259
- line = remove_bom(line) if @csv_line_count == 1
260
- line
261
- end
262
-
263
- def skip_lines(filehandle, options)
264
- return unless options[:skip_lines].to_i > 0
265
-
266
- options[:skip_lines].to_i.times do
267
- readline_with_counts(filehandle, options)
268
- end
269
- end
270
-
271
- def rewind(filehandle)
272
- @file_line_count = 0
273
- @csv_line_count = 0
274
- filehandle.rewind
275
- end
276
-
277
- ###
278
- ### Thin wrapper around C-extension
279
- ###
280
- def parse(line, options, header_size = nil)
281
- # puts "SmarterCSV.parse OPTIONS: #{options[:acceleration]}" if options[:verbose]
282
-
283
- if options[:acceleration] && has_acceleration?
284
- # :nocov:
285
- has_quotes = line =~ /#{options[:quote_char]}/
286
- elements = parse_csv_line_c(line, options[:col_sep], options[:quote_char], header_size)
287
- elements.map!{|x| cleanup_quotes(x, options[:quote_char])} if has_quotes
288
- [elements, elements.size]
289
- # :nocov:
290
- else
291
- # puts "WARNING: SmarterCSV is using un-accelerated parsing of lines. Check options[:acceleration]"
292
- parse_csv_line_ruby(line, options, header_size)
293
- end
294
- end
295
-
296
- # ------------------------------------------------------------------
297
- # Ruby equivalent of the C-extension for parse_line
298
- #
299
- # parses a single line: either a CSV header and body line
300
- # - quoting rules compared to RFC-4180 are somewhat relaxed
301
- # - we are not assuming that quotes inside a fields need to be doubled
302
- # - we are not assuming that all fields need to be quoted (0 is even)
303
- # - works with multi-char col_sep
304
- # - if header_size is given, only up to header_size fields are parsed
305
- #
306
- # We use header_size for parsing the body lines to make sure we always match the number of headers
307
- # in case there are trailing col_sep characters in line
308
- #
309
- # Our convention is that empty fields are returned as empty strings, not as nil.
310
- #
311
- #
312
- # the purpose of the max_size parameter is to handle a corner case where
313
- # CSV lines contain more fields than the header.
314
- # In which case the remaining fields in the line are ignored
315
- #
316
- def parse_csv_line_ruby(line, options, header_size = nil)
317
- return [] if line.nil?
318
-
319
- line_size = line.size
320
- col_sep = options[:col_sep]
321
- col_sep_size = col_sep.size
322
- quote = options[:quote_char]
323
- quote_count = 0
324
- elements = []
325
- start = 0
326
- i = 0
327
-
328
- previous_char = ''
329
- while i < line_size
330
- if line[i...i+col_sep_size] == col_sep && quote_count.even?
331
- break if !header_size.nil? && elements.size >= header_size
332
-
333
- elements << cleanup_quotes(line[start...i], quote)
334
- previous_char = line[i]
335
- i += col_sep.size
336
- start = i
337
- else
338
- quote_count += 1 if line[i] == quote && previous_char != '\\'
339
- previous_char = line[i]
340
- i += 1
341
- end
342
- end
343
- elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
344
- [elements, elements.size]
345
- end
32
+ Please contribute and create a pull request if you need this
33
+ -------------------------------------------------------------------------
346
34
 
347
- def cleanup_quotes(field, quote)
348
- return field if field.nil?
349
-
350
- # return if field !~ /#{quote}/ # this check can probably eliminated
351
-
352
- if field.start_with?(quote) && field.end_with?(quote)
353
- field.delete_prefix!(quote)
354
- field.delete_suffix!(quote)
355
- end
356
- field.gsub!("#{quote}#{quote}", quote)
357
- field
358
- end
359
-
360
- # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
361
- # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
362
- BLANK_RE = /\A\s*\z/.freeze
363
-
364
- def blank?(value)
365
- case value
366
- when String
367
- value.empty? || BLANK_RE.match?(value)
368
-
369
- when NilClass
370
- true
371
-
372
- when Array
373
- value.empty? || value.inject(true){|result, x| result &&= elem_blank?(x)}
374
-
375
- when Hash
376
- value.empty? || value.values.inject(true){|result, x| result &&= elem_blank?(x)}
377
-
378
- else
379
- false
380
- end
381
- end
382
-
383
- def elem_blank?(value)
384
- case value
385
- when String
386
- value.empty? || BLANK_RE.match?(value)
387
-
388
- when NilClass
389
- true
390
-
391
- else
392
- false
393
- end
394
- end
395
-
396
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
397
- def only_or_except_limit_execution(options, option_name, key)
398
- if options[option_name].is_a?(Hash)
399
- if options[option_name].has_key?(:except)
400
- return true if Array(options[option_name][:except]).include?(key)
401
- elsif options[option_name].has_key?(:only)
402
- return true unless Array(options[option_name][:only]).include?(key)
403
- end
404
- end
405
- false
406
- end
407
-
408
- # If file has headers, then guesses column separator from headers.
409
- # Otherwise guesses column separator from contents.
410
- # Raises exception if none is found.
411
- def guess_column_separator(filehandle, options)
412
- skip_lines(filehandle, options)
413
-
414
- delimiters = [',', "\t", ';', ':', '|']
415
-
416
- line = nil
417
- has_header = options[:headers_in_file]
418
- candidates = Hash.new(0)
419
- count = has_header ? 1 : 5
420
- count.times do
421
- line = readline_with_counts(filehandle, options)
422
- delimiters.each do |d|
423
- candidates[d] += line.scan(d).count
424
- end
425
- rescue EOFError # short files
426
- break
427
- end
428
- rewind(filehandle)
429
-
430
- if candidates.values.max == 0
431
- # if the header only contains
432
- return ',' if line.chomp(options[:row_sep]) =~ /^\w+$/
433
-
434
- raise SmarterCSV::NoColSepDetected
435
- end
436
-
437
- candidates.key(candidates.values.max)
438
- end
439
-
440
- # limitation: this currently reads the whole file in before making a decision
441
- def guess_line_ending(filehandle, options)
442
- counts = {"\n" => 0, "\r" => 0, "\r\n" => 0}
443
- quoted_char = false
444
-
445
- # count how many of the pre-defined line-endings we find
446
- # ignoring those contained within quote characters
447
- last_char = nil
448
- lines = 0
449
- filehandle.each_char do |c|
450
- quoted_char = !quoted_char if c == options[:quote_char]
451
- next if quoted_char
452
-
453
- if last_char == "\r"
454
- if c == "\n"
455
- counts["\r\n"] += 1
456
- else
457
- counts["\r"] += 1 # \r are counted after they appeared
458
- end
459
- elsif c == "\n"
460
- counts["\n"] += 1
461
- end
462
- last_char = c
463
- lines += 1
464
- break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
465
- end
466
- rewind(filehandle)
467
-
468
- counts["\r"] += 1 if last_char == "\r"
469
- # find the most frequent key/value pair:
470
- most_frequent_key, _count = counts.max_by{|_, v| v}
471
- most_frequent_key
472
- end
473
-
474
- def process_headers(filehandle, options)
475
- @raw_header = nil
476
- @headers = nil
477
- if options[:headers_in_file] # extract the header line
478
- # process the header line in the CSV file..
479
- # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
480
- header = readline_with_counts(filehandle, options)
481
- @raw_header = header
482
-
483
- header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
484
- header = header.sub(options[:comment_regexp], '') if options[:comment_regexp]
485
- header = header.chomp(options[:row_sep])
486
-
487
- header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
488
-
489
- file_headerA, file_header_size = parse(header, options)
490
-
491
- file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
492
- file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
493
-
494
- unless options[:keep_original_headers]
495
- file_headerA.map!{|x| x.gsub(/\s+|-+/, '_')}
496
- file_headerA.map!{|x| x.downcase} if options[:downcase_header]
497
- end
498
- else
499
- raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
500
- end
501
- if options[:user_provided_headers] && options[:user_provided_headers].class == Array && !options[:user_provided_headers].empty?
502
- # use user-provided headers
503
- headerA = options[:user_provided_headers]
504
- if defined?(file_header_size) && !file_header_size.nil?
505
- if headerA.size != file_header_size
506
- raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file has #{file_header_size} headers"
507
- else
508
- # we could print out the mapping of file_headerA to headerA here
509
- end
510
- end
511
- else
512
- headerA = file_headerA
513
- end
514
-
515
- # detect duplicate headers and disambiguate
516
- headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
517
- header_size = headerA.size # used for splitting lines
518
-
519
- headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
520
-
521
- unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
522
- key_mappingH = options[:key_mapping]
523
-
524
- # do some key mapping on the keys in the file header
525
- # if you want to completely delete a key, then map it to nil or to ''
526
- if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
527
- # if silence_missing_keys are not set, raise error if missing header
528
- missing_keys = key_mappingH.keys - headerA
529
- # if the user passes a list of speciffic mapped keys that are optional
530
- missing_keys -= options[:silence_missing_keys] if options[:silence_missing_keys].is_a?(Array)
531
-
532
- unless missing_keys.empty? || options[:silence_missing_keys] == true
533
- raise SmarterCSV::KeyMappingError, "ERROR: can not map headers: #{missing_keys.join(', ')}"
534
- end
535
-
536
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
537
- end
538
- end
539
-
540
- # header_validations
541
- duplicate_headers = []
542
- headerA.compact.each do |k|
543
- duplicate_headers << k if headerA.select{|x| x == k}.size > 1
544
- end
545
-
546
- unless options[:user_provided_headers] || duplicate_headers.empty?
547
- raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
548
- end
549
-
550
- # deprecate required_headers
551
- unless options[:required_headers].nil?
552
- puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'"
553
- if options[:required_keys].nil?
554
- options[:required_keys] = options[:required_headers]
555
- options[:required_headers] = nil
556
- end
557
- end
558
-
559
- if options[:required_keys] && options[:required_keys].is_a?(Array)
560
- missing_keys = []
561
- options[:required_keys].each do |k|
562
- missing_keys << k unless headerA.include?(k)
563
- end
564
- raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
565
- end
566
-
567
- @headers = headerA
568
- [headerA, header_size]
569
- end
570
-
571
- def process_duplicate_headers(headers, options)
572
- counts = Hash.new(0)
573
- result = []
574
- headers.each do |key|
575
- counts[key] += 1
576
- if counts[key] == 1
577
- result << key
578
- else
579
- result << [key, options[:duplicate_header_suffix], counts[key]].join
580
- end
581
- end
582
- result
583
- end
584
-
585
- private
586
-
587
- UTF_32_BOM = %w[0 0 fe ff].freeze
588
- UTF_32LE_BOM = %w[ff fe 0 0].freeze
589
- UTF_8_BOM = %w[ef bb bf].freeze
590
- UTF_16_BOM = %w[fe ff].freeze
591
- UTF_16LE_BOM = %w[ff fe].freeze
592
-
593
- def remove_bom(str)
594
- str_as_hex = str.bytes.map{|x| x.to_s(16)}
595
- # if string does not start with one of the bytes, there is no BOM
596
- return str unless %w[ef fe ff 0].include?(str_as_hex[0])
597
-
598
- return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
599
- return str.byteslice(3..-1) if str_as_hex[0..2] == UTF_8_BOM
600
- return str.byteslice(2..-1) if [UTF_16_BOM, UTF_16LE_BOM].include?(str_as_hex[0..1])
601
-
602
- puts "SmarterCSV found unhandled BOM! #{str.chars[0..7].inspect}"
603
- str
604
- end
605
-
606
- def validate_options!(options)
607
- keys = options.keys
608
- errors = []
609
- errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
610
- errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
611
- errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
612
- raise SmarterCSV::ValidationError, errors.inspect if errors.any?
613
- end
614
-
615
- def option_valid?(str)
616
- return true if str.is_a?(Symbol) && str == :auto
617
- return true if str.is_a?(String) && !str.empty?
618
-
619
- false
620
- end
621
- end
35
+ BLOCK_COMMENT
622
36
  end
37
+ # :nocov:
38
+ require "smarter_csv/smarter_csv"
39
+
data/smarter_csv.gemspec CHANGED
@@ -31,8 +31,9 @@ Gem::Specification.new do |spec|
31
31
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
32
32
 
33
33
  spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
34
- spec.require_paths = ["lib"] # add ext here?
34
+ spec.require_paths = %w[lib ext]
35
35
  spec.extensions = ["ext/smarter_csv/extconf.rb"]
36
+ spec.files += Dir.glob("ext/smarter_csv/**/*")
36
37
 
37
38
  spec.add_development_dependency "awesome_print"
38
39
  spec.add_development_dependency "codecov"