smarter_csv 1.1.5 → 1.12.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +5 -5
  2. data/.rspec +1 -2
  3. data/.rubocop.yml +154 -0
  4. data/CHANGELOG.md +364 -0
  5. data/CONTRIBUTORS.md +56 -0
  6. data/Gemfile +7 -2
  7. data/LICENSE.txt +21 -0
  8. data/README.md +44 -441
  9. data/Rakefile +39 -19
  10. data/TO_DO_v2.md +14 -0
  11. data/docs/_introduction.md +56 -0
  12. data/docs/basic_api.md +157 -0
  13. data/docs/batch_processing.md +68 -0
  14. data/docs/data_transformations.md +50 -0
  15. data/docs/examples.md +75 -0
  16. data/docs/header_transformations.md +113 -0
  17. data/docs/header_validations.md +36 -0
  18. data/docs/options.md +98 -0
  19. data/docs/row_col_sep.md +104 -0
  20. data/docs/value_converters.md +68 -0
  21. data/ext/smarter_csv/extconf.rb +14 -0
  22. data/ext/smarter_csv/smarter_csv.c +97 -0
  23. data/lib/smarter_csv/auto_detection.rb +78 -0
  24. data/lib/smarter_csv/errors.rb +16 -0
  25. data/lib/smarter_csv/file_io.rb +50 -0
  26. data/lib/smarter_csv/hash_transformations.rb +91 -0
  27. data/lib/smarter_csv/header_transformations.rb +63 -0
  28. data/lib/smarter_csv/header_validations.rb +34 -0
  29. data/lib/smarter_csv/headers.rb +68 -0
  30. data/lib/smarter_csv/options.rb +95 -0
  31. data/lib/smarter_csv/parser.rb +90 -0
  32. data/lib/smarter_csv/reader.rb +243 -0
  33. data/lib/smarter_csv/version.rb +3 -1
  34. data/lib/smarter_csv/writer.rb +116 -0
  35. data/lib/smarter_csv.rb +91 -3
  36. data/smarter_csv.gemspec +43 -20
  37. metadata +122 -137
  38. data/.gitignore +0 -8
  39. data/.travis.yml +0 -19
  40. data/lib/extensions/hash.rb +0 -7
  41. data/lib/smarter_csv/smarter_csv.rb +0 -281
  42. data/spec/fixtures/basic.csv +0 -8
  43. data/spec/fixtures/binary.csv +0 -1
  44. data/spec/fixtures/carriage_returns_n.csv +0 -18
  45. data/spec/fixtures/carriage_returns_quoted.csv +0 -3
  46. data/spec/fixtures/carriage_returns_r.csv +0 -1
  47. data/spec/fixtures/carriage_returns_rn.csv +0 -18
  48. data/spec/fixtures/chunk_cornercase.csv +0 -10
  49. data/spec/fixtures/empty.csv +0 -5
  50. data/spec/fixtures/line_endings_n.csv +0 -4
  51. data/spec/fixtures/line_endings_r.csv +0 -1
  52. data/spec/fixtures/line_endings_rn.csv +0 -4
  53. data/spec/fixtures/lots_of_columns.csv +0 -2
  54. data/spec/fixtures/malformed.csv +0 -3
  55. data/spec/fixtures/malformed_header.csv +0 -3
  56. data/spec/fixtures/money.csv +0 -3
  57. data/spec/fixtures/no_header.csv +0 -7
  58. data/spec/fixtures/numeric.csv +0 -5
  59. data/spec/fixtures/pets.csv +0 -5
  60. data/spec/fixtures/quoted.csv +0 -5
  61. data/spec/fixtures/separator.csv +0 -4
  62. data/spec/fixtures/skip_lines.csv +0 -8
  63. data/spec/fixtures/valid_unicode.csv +0 -5
  64. data/spec/fixtures/with_dashes.csv +0 -8
  65. data/spec/fixtures/with_dates.csv +0 -4
  66. data/spec/smarter_csv/binary_file2_spec.rb +0 -24
  67. data/spec/smarter_csv/binary_file_spec.rb +0 -22
  68. data/spec/smarter_csv/carriage_return_spec.rb +0 -170
  69. data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
  70. data/spec/smarter_csv/close_file_spec.rb +0 -15
  71. data/spec/smarter_csv/column_separator_spec.rb +0 -11
  72. data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
  73. data/spec/smarter_csv/extenstions_spec.rb +0 -17
  74. data/spec/smarter_csv/header_transformation_spec.rb +0 -21
  75. data/spec/smarter_csv/keep_headers_spec.rb +0 -24
  76. data/spec/smarter_csv/key_mapping_spec.rb +0 -25
  77. data/spec/smarter_csv/line_ending_spec.rb +0 -43
  78. data/spec/smarter_csv/load_basic_spec.rb +0 -20
  79. data/spec/smarter_csv/malformed_spec.rb +0 -21
  80. data/spec/smarter_csv/no_header_spec.rb +0 -24
  81. data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
  82. data/spec/smarter_csv/quoted_spec.rb +0 -23
  83. data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
  84. data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
  85. data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
  86. data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
  87. data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
  88. data/spec/smarter_csv/skip_lines_spec.rb +0 -29
  89. data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
  90. data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
  91. data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
  92. data/spec/smarter_csv/value_converters_spec.rb +0 -52
  93. data/spec/spec/spec_helper.rb +0 -17
  94. data/spec/spec.opts +0 -2
  95. data/spec/spec_helper.rb +0 -21
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmarterCSV
4
+ #
5
+ # NOTE: this is not called when "parse" methods are tested by themselves
6
+ #
7
+ # ONLY FOR BACKWARDS-COMPATIBILITY
8
+ def self.default_options
9
+ Options::DEFAULT_OPTIONS
10
+ end
11
+
12
+ module Options
13
+ DEFAULT_OPTIONS = {
14
+ acceleration: true, # if user wants to use accelleration or not
15
+ auto_row_sep_chars: 500,
16
+ chunk_size: nil,
17
+ col_sep: :auto, # was: ',',
18
+ comment_regexp: nil, # was: /\A#/,
19
+ convert_values_to_numeric: true,
20
+ downcase_header: true,
21
+ duplicate_header_suffix: '', # was: nil,
22
+ file_encoding: 'utf-8',
23
+ force_simple_split: false,
24
+ force_utf8: false,
25
+ headers_in_file: true,
26
+ invalid_byte_sequence: '',
27
+ keep_original_headers: false,
28
+ key_mapping: nil,
29
+ quote_char: '"',
30
+ remove_empty_hashes: true,
31
+ remove_empty_values: true,
32
+ remove_unmapped_keys: false,
33
+ remove_values_matching: nil,
34
+ remove_zero_values: false,
35
+ required_headers: nil,
36
+ required_keys: nil,
37
+ row_sep: :auto, # was: $/,
38
+ silence_missing_keys: false,
39
+ skip_lines: nil,
40
+ strings_as_keys: false,
41
+ strip_chars_from_headers: nil,
42
+ strip_whitespace: true,
43
+ user_provided_headers: nil,
44
+ value_converters: nil,
45
+ verbose: false,
46
+ with_line_numbers: false,
47
+ }.freeze
48
+
49
+ # NOTE: this is not called when "parse" methods are tested by themselves
50
+ def process_options(given_options = {})
51
+ puts "User provided options:\n#{pp(given_options)}\n" if given_options[:verbose]
52
+
53
+ @options = DEFAULT_OPTIONS.dup.merge!(given_options)
54
+
55
+ # fix invalid input
56
+ @options[:invalid_byte_sequence] ||= ''
57
+
58
+ puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose]
59
+
60
+ validate_options!(@options)
61
+ @options
62
+ end
63
+
64
+ private
65
+
66
+ def validate_options!(options)
67
+ # deprecate required_headers
68
+ unless options[:required_headers].nil?
69
+ puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'"
70
+ if options[:required_keys].nil?
71
+ options[:required_keys] = options[:required_headers]
72
+ options[:required_headers] = nil
73
+ end
74
+ end
75
+
76
+ keys = options.keys
77
+ errors = []
78
+ errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
79
+ errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
80
+ errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
81
+ raise SmarterCSV::ValidationError, errors.inspect if errors.any?
82
+ end
83
+
84
+ def option_valid?(str)
85
+ return true if str.is_a?(Symbol) && str == :auto
86
+ return true if str.is_a?(String) && !str.empty?
87
+
88
+ false
89
+ end
90
+
91
+ def pp(value)
92
+ defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmarterCSV
4
+ module Parser
5
+ protected
6
+
7
+ ###
8
+ ### Thin wrapper around C-extension
9
+ ###
10
+ def parse(line, options, header_size = nil)
11
+ # puts "SmarterCSV.parse OPTIONS: #{options[:acceleration]}" if options[:verbose]
12
+
13
+ if options[:acceleration] && has_acceleration
14
+ # :nocov:
15
+ has_quotes = line =~ /#{options[:quote_char]}/
16
+ elements = parse_csv_line_c(line, options[:col_sep], options[:quote_char], header_size)
17
+ elements.map!{|x| cleanup_quotes(x, options[:quote_char])} if has_quotes
18
+ [elements, elements.size]
19
+ # :nocov:
20
+ else
21
+ # puts "WARNING: SmarterCSV is using un-accelerated parsing of lines. Check options[:acceleration]"
22
+ parse_csv_line_ruby(line, options, header_size)
23
+ end
24
+ end
25
+
26
+ # ------------------------------------------------------------------
27
+ # Ruby equivalent of the C-extension for parse_line
28
+ #
29
+ # parses a single line: either a CSV header and body line
30
+ # - quoting rules compared to RFC-4180 are somewhat relaxed
31
+ # - we are not assuming that quotes inside a fields need to be doubled
32
+ # - we are not assuming that all fields need to be quoted (0 is even)
33
+ # - works with multi-char col_sep
34
+ # - if header_size is given, only up to header_size fields are parsed
35
+ #
36
+ # We use header_size for parsing the body lines to make sure we always match the number of headers
37
+ # in case there are trailing col_sep characters in line
38
+ #
39
+ # Our convention is that empty fields are returned as empty strings, not as nil.
40
+ #
41
+ #
42
+ # the purpose of the max_size parameter is to handle a corner case where
43
+ # CSV lines contain more fields than the header.
44
+ # In which case the remaining fields in the line are ignored
45
+ #
46
+ def parse_csv_line_ruby(line, options, header_size = nil)
47
+ return [] if line.nil?
48
+
49
+ line_size = line.size
50
+ col_sep = options[:col_sep]
51
+ col_sep_size = col_sep.size
52
+ quote = options[:quote_char]
53
+ quote_count = 0
54
+ elements = []
55
+ start = 0
56
+ i = 0
57
+
58
+ previous_char = ''
59
+ while i < line_size
60
+ if line[i...i+col_sep_size] == col_sep && quote_count.even?
61
+ break if !header_size.nil? && elements.size >= header_size
62
+
63
+ elements << cleanup_quotes(line[start...i], quote)
64
+ previous_char = line[i]
65
+ i += col_sep.size
66
+ start = i
67
+ else
68
+ quote_count += 1 if line[i] == quote && previous_char != '\\'
69
+ previous_char = line[i]
70
+ i += 1
71
+ end
72
+ end
73
+ elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
74
+ [elements, elements.size]
75
+ end
76
+
77
+ def cleanup_quotes(field, quote)
78
+ return field if field.nil?
79
+
80
+ # return if field !~ /#{quote}/ # this check can probably eliminated
81
+
82
+ if field.start_with?(quote) && field.end_with?(quote)
83
+ field.delete_prefix!(quote)
84
+ field.delete_suffix!(quote)
85
+ end
86
+ field.gsub!("#{quote}#{quote}", quote)
87
+ field
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmarterCSV
4
+ class Reader
5
+ include ::SmarterCSV::Options
6
+ include ::SmarterCSV::FileIO
7
+ include ::SmarterCSV::AutoDetection
8
+ include ::SmarterCSV::Headers
9
+ include ::SmarterCSV::HeaderTransformations
10
+ include ::SmarterCSV::HeaderValidations
11
+ include ::SmarterCSV::HashTransformations
12
+ include ::SmarterCSV::Parser
13
+
14
+ attr_reader :input, :options
15
+ attr_reader :csv_line_count, :chunk_count, :file_line_count
16
+ attr_reader :enforce_utf8, :has_rails, :has_acceleration
17
+ attr_reader :errors, :warnings, :headers, :raw_header, :result
18
+
19
+ # :nocov:
20
+ # rubocop:disable Naming/MethodName
21
+ def headerA
22
+ warn "Deprecarion Warning: 'headerA' will be removed in future versions. Use 'headders'"
23
+ @headerA
24
+ end
25
+ # rubocop:enable Naming/MethodName
26
+ # :nocov:
27
+
28
+ # first parameter: filename or input object which responds to readline method
29
+ def initialize(input, given_options = {})
30
+ @input = input
31
+ @has_rails = !!defined?(Rails)
32
+ @csv_line_count = 0
33
+ @chunk_count = 0
34
+ @errors = {}
35
+ @file_line_count = 0
36
+ @headerA = []
37
+ @headers = nil
38
+ @raw_header = nil # header as it appears in the file
39
+ @result = []
40
+ @warnings = {}
41
+ @enforce_utf8 = false # only set to true if needed (after options parsing)
42
+ @options = process_options(given_options)
43
+ # true if it is compiled with accelleration
44
+ @has_acceleration = !!SmarterCSV::Parser.respond_to?(:parse_csv_line_c)
45
+ end
46
+
47
+ def process(&block) # rubocop:disable Lint/UnusedMethodArgument
48
+ @enforce_utf8 = options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
49
+ @verbose = options[:verbose]
50
+
51
+ begin
52
+ fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
53
+
54
+ if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
55
+ puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
56
+ end
57
+
58
+ # auto-detect the row separator
59
+ options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
60
+ # attempt to auto-detect column separator
61
+ options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto
62
+
63
+ skip_lines(fh, options)
64
+
65
+ @headers, header_size = process_headers(fh, options)
66
+ @headerA = @headers # @headerA is deprecated, use @headers
67
+
68
+ puts "Effective headers:\n#{pp(@headers)}\n" if @verbose
69
+
70
+ header_validations(@headers, options)
71
+
72
+ # in case we use chunking.. we'll need to set it up..
73
+ if options[:chunk_size].to_i > 0
74
+ use_chunks = true
75
+ chunk_size = options[:chunk_size].to_i
76
+ @chunk_count = 0
77
+ chunk = []
78
+ else
79
+ use_chunks = false
80
+ end
81
+
82
+ # now on to processing all the rest of the lines in the CSV file:
83
+ # fh.each_line |line|
84
+ until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
85
+ line = readline_with_counts(fh, options)
86
+
87
+ # replace invalid byte sequence in UTF-8 with question mark to avoid errors
88
+ line = enforce_utf8_encoding(line, options) if @enforce_utf8
89
+
90
+ print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if @verbose
91
+
92
+ next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
93
+
94
+ # cater for the quoted csv data containing the row separator carriage return character
95
+ # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
96
+ # by detecting the existence of an uneven number of quote characters
97
+ multiline = count_quote_chars(line, options[:quote_char]).odd?
98
+
99
+ while multiline
100
+ next_line = fh.readline(options[:row_sep])
101
+ next_line = enforce_utf8_encoding(next_line, options) if @enforce_utf8
102
+ line += next_line
103
+ @file_line_count += 1
104
+
105
+ break if fh.eof? # Exit loop if end of file is reached
106
+
107
+ multiline = count_quote_chars(line, options[:quote_char]).odd?
108
+ end
109
+
110
+ # :nocov:
111
+ if multiline && @verbose
112
+ print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count
113
+ end
114
+ # :nocov:
115
+
116
+ line.chomp!(options[:row_sep])
117
+
118
+ # --- SPLIT LINE & DATA TRANSFORMATIONS ------------------------------------------------------------
119
+ dataA, _data_size = parse(line, options, header_size)
120
+
121
+ dataA.map!{|x| x.strip} if options[:strip_whitespace]
122
+
123
+ # if all values are blank, then ignore this line
124
+ next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))
125
+
126
+ # --- HASH TRANSFORMATIONS ------------------------------------------------------------
127
+ hash = @headers.zip(dataA).to_h
128
+
129
+ hash = hash_transformations(hash, options)
130
+
131
+ # --- HASH VALIDATIONS ----------------------------------------------------------------
132
+ # will go here, and be able to:
133
+ # - validate correct format of the values for fields
134
+ # - required fields to be non-empty
135
+ # - ...
136
+ # -------------------------------------------------------------------------------------
137
+
138
+ next if options[:remove_empty_hashes] && hash.empty?
139
+
140
+ puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting
141
+ # optional adding of csv_line_number to the hash to help debugging
142
+ hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
143
+
144
+ # process the chunks or the resulting hash
145
+ if use_chunks
146
+ chunk << hash # append temp result to chunk
147
+
148
+ if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
149
+ # do something with the chunk
150
+ if block_given?
151
+ yield chunk # do something with the hashes in the chunk in the block
152
+ else
153
+ @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear)
154
+ end
155
+ @chunk_count += 1
156
+ chunk.clear # re-initialize for next chunk of data
157
+ else
158
+ # the last chunk may contain partial data, which is handled below
159
+ end
160
+ # while a chunk is being filled up we don't need to do anything else here
161
+
162
+ else # no chunk handling
163
+ if block_given?
164
+ yield [hash] # do something with the hash in the block (better to use chunking here)
165
+ else
166
+ @result << hash
167
+ end
168
+ end
169
+ end
170
+
171
+ # print new line to retain last processing line message
172
+ print "\n" if @verbose
173
+
174
+ # handling of last chunk:
175
+ if !chunk.nil? && chunk.size > 0
176
+ # do something with the chunk
177
+ if block_given?
178
+ yield chunk # do something with the hashes in the chunk in the block
179
+ else
180
+ @result << chunk.dup # Append chunk to result (use .dup to keep a copy after we do chunk.clear)
181
+ end
182
+ @chunk_count += 1
183
+ # chunk = [] # initialize for next chunk of data
184
+ end
185
+ ensure
186
+ fh.close if fh.respond_to?(:close)
187
+ end
188
+
189
+ if block_given?
190
+ @chunk_count # when we do processing through a block we only care how many chunks we processed
191
+ else
192
+ @result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
193
+ end
194
+ end
195
+
196
+ def count_quote_chars(line, quote_char)
197
+ return 0 if line.nil? || quote_char.nil? || quote_char.empty?
198
+
199
+ count = 0
200
+ escaped = false
201
+
202
+ line.each_char do |char|
203
+ if char == '\\' && !escaped
204
+ escaped = true
205
+ else
206
+ count += 1 if char == quote_char && !escaped
207
+ escaped = false
208
+ end
209
+ end
210
+
211
+ count
212
+ end
213
+
214
+ protected
215
+
216
+ # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
217
+ # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
218
+ BLANK_RE = /\A\s*\z/.freeze
219
+
220
+ def blank?(value)
221
+ case value
222
+ when String
223
+ BLANK_RE.match?(value)
224
+ when NilClass
225
+ true
226
+ when Array
227
+ value.all? { |elem| blank?(elem) }
228
+ when Hash
229
+ value.values.all? { |elem| blank?(elem) } # Focus on values only
230
+ else
231
+ false
232
+ end
233
+ end
234
+
235
+ private
236
+
237
+ def enforce_utf8_encoding(line, options)
238
+ # return line unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
239
+
240
+ line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence])
241
+ end
242
+ end
243
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SmarterCSV
2
- VERSION = "1.1.5"
4
+ VERSION = "1.12.1"
3
5
  end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmarterCSV
4
+ #
5
+ # Generate CSV files
6
+ #
7
+ # Create an instance of the Writer class with the filename and options.
8
+ # call `<<` one or mulltiple times to append data to the file.
9
+ # call `finalize` to save the file.
10
+ #
11
+ # The `<<` method can take different arguments:
12
+ # * a signle Hash
13
+ # * an array of Hashes
14
+ # * nested arrays of arrays of Hashes
15
+ #
16
+ # By default SmarterCSV::Writer automatically discovers all headers that are present
17
+ # in the data on-the-fly. This can be disabled, then only given headers are used.
18
+ # Disabling can be useful when you want to select attributes from hashes, or ActiveRecord instances.
19
+ #
20
+ # If `discover_headers` is enabled, and headers are given, any new headers that are found in the data will still be appended.
21
+ #
22
+ # The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char.
23
+ #
24
+ # Options:
25
+ # col_sep : defaults to , but can be set to any other character
26
+ # row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else
27
+ # quote_char : defaults to "
28
+ # discover_headers : defaults to true
29
+ # headers : defaults to []
30
+ # force_quotes: defaults to false
31
+ # map_headers: defaults to {}, can be a hash of key -> value mappings
32
+
33
+ # IMPORTANT NOTES:
34
+ # * Data hashes could contain strings or symbols as keys.
35
+ # Make sure to use the correct form when specifying headers manually,
36
+ # in combination with the :discover_headers option
37
+
38
+ attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
39
+
40
+ class Writer
41
+ def initialize(file_path, options = {})
42
+ @options = options
43
+
44
+ @row_sep = options[:row_sep] || $/ # Defaults to system's row separator. RFC4180 "\r\n"
45
+ @col_sep = options[:col_sep] || ','
46
+ @quote_char = options[:quote_char] || '"'
47
+ @force_quotes = options[:force_quotes] == true
48
+ @discover_headers = true # defaults to true
49
+ if options.has_key?(:discover_headers)
50
+ # passing in the option overrides the default behavior
51
+ @discover_headers = options[:discover_headers] == true
52
+ else
53
+ # disable discover_headers when headers are given explicitly
54
+ @discover_headers = !(options.has_key?(:map_headers) || options.has_key?(:headers))
55
+ end
56
+ @headers = [] # start with empty headers
57
+ @headers = options[:headers] if options.has_key?(:headers) # unless explicitly given
58
+ @headers = options[:map_headers].keys if options.has_key?(:map_headers) && !options.has_key?(:headers)
59
+ @map_headers = options[:map_headers] || {}
60
+
61
+ @output_file = File.open(file_path, 'w+')
62
+ # hidden state:
63
+ @temp_file = Tempfile.new('tempfile', '/tmp')
64
+ @quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char)
65
+ end
66
+
67
+ # this can be called many times in order to append lines to the csv file
68
+ def <<(data)
69
+ case data
70
+ when Hash
71
+ process_hash(data)
72
+ when Array
73
+ data.each { |item| self << item }
74
+ when NilClass
75
+ # ignore
76
+ else
77
+ raise InvalidInputData, "Invalid data type: #{data.class}. Must be a Hash or an Array."
78
+ end
79
+ end
80
+
81
+ def finalize
82
+ # Map headers if :map_headers option is provided
83
+ mapped_headers = @headers.map { |header| @map_headers[header] || header }
84
+
85
+ @temp_file.rewind
86
+ @output_file.write(mapped_headers.join(@col_sep) + @row_sep)
87
+ @output_file.write(@temp_file.read)
88
+ @output_file.flush
89
+ @output_file.close
90
+ @temp_file.delete
91
+ end
92
+
93
+ private
94
+
95
+ def process_hash(hash)
96
+ if @discover_headers
97
+ hash_keys = hash.keys
98
+ new_keys = hash_keys - @headers
99
+ @headers.concat(new_keys)
100
+ end
101
+
102
+ # Reorder the hash to match the current headers order and fill missing fields
103
+ ordered_row = @headers.map { |header| hash[header] || '' }
104
+
105
+ @temp_file.write ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @row_sep
106
+ end
107
+
108
+ def escape_csv_field(field)
109
+ if @force_quotes || field.to_s.match(@quote_regex)
110
+ "\"#{field}\""
111
+ else
112
+ field.to_s
113
+ end
114
+ end
115
+ end
116
+ end
data/lib/smarter_csv.rb CHANGED
@@ -1,4 +1,92 @@
1
- require 'csv'
1
+ # frozen_string_literal: true
2
+
2
3
  require "smarter_csv/version"
3
- require "extensions/hash.rb"
4
- require "smarter_csv/smarter_csv.rb"
4
+ require "smarter_csv/errors"
5
+
6
+ require "smarter_csv/file_io"
7
+ require "smarter_csv/options"
8
+ require "smarter_csv/auto_detection"
9
+ require 'smarter_csv/header_transformations'
10
+ require 'smarter_csv/header_validations'
11
+ require "smarter_csv/headers"
12
+ require "smarter_csv/hash_transformations"
13
+
14
+ require "smarter_csv/parser"
15
+ require "smarter_csv/writer"
16
+ require "smarter_csv/reader"
17
+
18
+ # load the C-extension:
19
+ case RUBY_ENGINE
20
+ when 'ruby'
21
+ begin
22
+ if `uname -s`.chomp == 'Darwin'
23
+ #
24
+ # Please report if you see cases where the rake-compiler is building x86_64 code on arm64 cpus:
25
+ # https://github.com/rake-compiler/rake-compiler/issues/231
26
+ #
27
+ require 'smarter_csv/smarter_csv.bundle'
28
+ else
29
+ # :nocov:
30
+ require_relative "smarter_csv/smarter_csv"
31
+ # :nocov:
32
+ end
33
+ rescue Exception # rubocop:disable Lint/RescueException
34
+ # require_relative 'smarter_csv/smarter_csv'
35
+ end
36
+ # :nocov:
37
+ # when 'truffleruby'
38
+ # puts "\n\n truffleruby case in the load path | RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}\n\n"
39
+ # # this might not work - if you encounter problems, please contribute and create a PR
40
+ # # require 'truffleruby/smarter_csv'
41
+ else
42
+ puts <<-BLOCK_COMMENT
43
+
44
+ -------------------------------------------------------------------------
45
+ RUBY_ENGINE: #{RUBY_ENGINE} , #{RUBY_VERSION}
46
+
47
+ Acceleration via C-Extension is currently not supported for #{RUBY_ENGINE}
48
+
49
+ Please contribute and create a pull request if you need this
50
+ -------------------------------------------------------------------------
51
+
52
+ BLOCK_COMMENT
53
+ end
54
+ # :nocov:
55
+
56
+ module SmarterCSV
57
+ # For backwards compatibility:
58
+ #
59
+ # while `SmarterCSV.process` works for simple cases, you can't get access to the internal state any longer.
60
+ # e.g. you need the instance of the Reader to access the original headers
61
+ #
62
+ # Please use this instead:
63
+ #
64
+ # reader = SmarterCSV::Reader.new(input, options)
65
+ # reader.process # with or without block
66
+ #
67
+ def self.process(input, given_options = {}, &block)
68
+ reader = Reader.new(input, given_options)
69
+ reader.process(&block)
70
+ end
71
+
72
+ # Convenience method for generating CSV files:
73
+ #
74
+ # SmarterCSV.generate(filename, options) do |csv_writer|
75
+ # MyModel.find_in_batches(batch_size: 100) do |batch|
76
+ # batch.pluck(:name, :description, :instructor).each do |record|
77
+ # csv_writer << record
78
+ # end
79
+ # end
80
+ # end
81
+ #
82
+ # rubocop:disable Lint/UnusedMethodArgument
83
+ def self.generate(filename, options = {}, &block)
84
+ raise unless block_given?
85
+
86
+ writer = Writer.new(filename, options)
87
+ yield writer
88
+ ensure
89
+ writer.finalize
90
+ end
91
+ # rubocop:enable Lint/UnusedMethodArgument
92
+ end