smarter_csv 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Gemfile +6 -3
- data/README.md +5 -1
- data/Rakefile +20 -5
- data/ext/smarter_csv/extconf.rb +2 -2
- data/ext/smarter_csv/smarter_csv.c +8 -2
- data/lib/smarter_csv/options_processing.rb +84 -0
- data/lib/smarter_csv/smarter_csv.rb +556 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +26 -610
- data/smarter_csv.gemspec +2 -1
- metadata +6 -3
- /data/lib/{extensions → core_ext}/hash.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e4032569303bd062a92b3c3f45f5166346808291667dda9ebd91af123f532ef
|
4
|
+
data.tar.gz: 78b73abc411d8ed866feae600b87b72c3c99fd3b00b67c81eac227c17f8d38ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1712951a2ce4f6e8ad93a6e76a105a3a8d4890babacfbb9ae3eead11ac638962d9da3d45421a327049e87c9d54b43c0dca1327f11a13bbd54440d3a7fefc6253
|
7
|
+
data.tar.gz: 3d8b81f04c8eb16a7b2ab9ddf27bdaf2b2bfdd2ee3a8b70765a88f809fc9869500debe950d8ec27e3a6af818e6f1e415d96d078e52784d638f1363619088faa3
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,19 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.9.2 (2023-11-12)
|
5
|
+
* fixed bug with '\\' at end of line (issue #252, thanks to averycrespi-moz)
|
6
|
+
* fixed require statements (issue #249, thanks to PikachuEXE, courtsimas)
|
7
|
+
|
8
|
+
## 1.9.1 (2023-10-30) (YANKED)
|
9
|
+
* yanked
|
10
|
+
* no functional changes
|
11
|
+
* refactored directory structure
|
12
|
+
* re-added JRuby and TruffleRuby to CI tests
|
13
|
+
* no C-accelleration for JRuby
|
14
|
+
* refactored options parsing
|
15
|
+
* code coverage / rubocop
|
16
|
+
|
4
17
|
## 1.9.0 (2023-09-04)
|
5
18
|
* fixed issue #139
|
6
19
|
|
data/Gemfile
CHANGED
@@ -5,10 +5,13 @@ source 'https://rubygems.org'
|
|
5
5
|
# Specify your gem's dependencies in smarter_csv.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "rake"
|
8
|
+
gem "rake"
|
9
9
|
gem "rake-compiler"
|
10
10
|
|
11
11
|
gem 'pry'
|
12
|
-
|
13
|
-
gem "rspec"
|
14
12
|
gem "rubocop"
|
13
|
+
|
14
|
+
group :test do
|
15
|
+
gem "rspec"
|
16
|
+
gem "simplecov"
|
17
|
+
end
|
data/README.md
CHANGED
@@ -46,7 +46,11 @@ One `smarter_csv` user wrote:
|
|
46
46
|
* able to ignore "columns" in the input (delete columns)
|
47
47
|
* able to eliminate nil or empty fields from the result hashes (default)
|
48
48
|
|
49
|
-
|
49
|
+
#### Assumptions / Limitations
|
50
|
+
* It is assumed that the escape character is `\`, as on UNIX and Windows systems.
|
51
|
+
* It is assumed that quote charcters around fields are balanced, e.g. valid: `"field"`, invalid: `"field\"`
|
52
|
+
e.g. an escaped `quote_char` does not denote the end of a field.
|
53
|
+
* This Gem is only for importing CSV files - writing of CSV files is not supported at this time.
|
50
54
|
|
51
55
|
### Why?
|
52
56
|
|
data/Rakefile
CHANGED
@@ -21,11 +21,26 @@ RuboCop::RakeTask.new
|
|
21
21
|
|
22
22
|
require "rake/extensiontask"
|
23
23
|
|
24
|
-
|
24
|
+
if RUBY_ENGINE == 'jruby'
|
25
25
|
|
26
|
-
|
27
|
-
|
26
|
+
task default: %i[spec]
|
27
|
+
|
28
|
+
else
|
29
|
+
task build: :compile
|
30
|
+
|
31
|
+
Rake::ExtensionTask.new("smarter_csv") do |ext|
|
32
|
+
ext.lib_dir = "lib/smarter_csv"
|
33
|
+
ext.ext_dir = "ext/smarter_csv"
|
34
|
+
ext.source_pattern = "*.{c,h}"
|
35
|
+
end
|
36
|
+
|
37
|
+
# task default: %i[clobber compile spec rubocop]
|
38
|
+
task default: %i[clobber compile spec]
|
28
39
|
end
|
29
40
|
|
30
|
-
|
31
|
-
task
|
41
|
+
desc 'Run spec with coverage'
|
42
|
+
task :coverage do
|
43
|
+
ENV['COVERAGE'] = 'true'
|
44
|
+
Rake::Task['spec'].execute
|
45
|
+
`open coverage/index.html`
|
46
|
+
end
|
data/ext/smarter_csv/extconf.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mkmf'
|
4
|
-
|
5
4
|
require "rbconfig"
|
5
|
+
|
6
6
|
if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
|
7
|
-
fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "$(cflags)")
|
7
|
+
fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "-O3 $(cflags)")
|
8
8
|
puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
|
9
9
|
RbConfig::MAKEFILE_CONFIG["CFLAGS"] = fixed_CFLAGS
|
10
10
|
end
|
@@ -40,6 +40,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
40
40
|
long i;
|
41
41
|
|
42
42
|
char prev_char = '\0'; // Store the previous character for comparison against an escape character
|
43
|
+
long backslash_count = 0; // to count consecutive backslash characters
|
43
44
|
|
44
45
|
while (p < endP) {
|
45
46
|
/* does the remaining string start with col_sep ? */
|
@@ -61,8 +62,13 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
61
62
|
startP = p;
|
62
63
|
}
|
63
64
|
} else {
|
64
|
-
if (*p ==
|
65
|
-
|
65
|
+
if (*p == '\\') {
|
66
|
+
backslash_count++;
|
67
|
+
} else {
|
68
|
+
if (*p == *quoteP && (backslash_count % 2 == 0)) {
|
69
|
+
quote_count++;
|
70
|
+
}
|
71
|
+
backslash_count = 0; // no more consecutive backslash characters
|
66
72
|
}
|
67
73
|
p++;
|
68
74
|
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SmarterCSV
|
4
|
+
DEFAULT_OPTIONS = {
|
5
|
+
acceleration: true,
|
6
|
+
auto_row_sep_chars: 500,
|
7
|
+
chunk_size: nil,
|
8
|
+
col_sep: :auto, # was: ',',
|
9
|
+
comment_regexp: nil, # was: /\A#/,
|
10
|
+
convert_values_to_numeric: true,
|
11
|
+
downcase_header: true,
|
12
|
+
duplicate_header_suffix: nil,
|
13
|
+
file_encoding: 'utf-8',
|
14
|
+
force_simple_split: false,
|
15
|
+
force_utf8: false,
|
16
|
+
headers_in_file: true,
|
17
|
+
invalid_byte_sequence: '',
|
18
|
+
keep_original_headers: false,
|
19
|
+
key_mapping: nil,
|
20
|
+
quote_char: '"',
|
21
|
+
remove_empty_hashes: true,
|
22
|
+
remove_empty_values: true,
|
23
|
+
remove_unmapped_keys: false,
|
24
|
+
remove_values_matching: nil,
|
25
|
+
remove_zero_values: false,
|
26
|
+
required_headers: nil,
|
27
|
+
required_keys: nil,
|
28
|
+
row_sep: :auto, # was: $/,
|
29
|
+
silence_missing_keys: false,
|
30
|
+
skip_lines: nil,
|
31
|
+
strings_as_keys: false,
|
32
|
+
strip_chars_from_headers: nil,
|
33
|
+
strip_whitespace: true,
|
34
|
+
user_provided_headers: nil,
|
35
|
+
value_converters: nil,
|
36
|
+
verbose: false,
|
37
|
+
with_line_numbers: false,
|
38
|
+
}.freeze
|
39
|
+
|
40
|
+
class << self
|
41
|
+
# NOTE: this is not called when "parse" methods are tested by themselves
|
42
|
+
def process_options(given_options = {})
|
43
|
+
puts "User provided options:\n#{pp(given_options)}\n" if given_options[:verbose]
|
44
|
+
|
45
|
+
# fix invalid input
|
46
|
+
given_options[:invalid_byte_sequence] = '' if given_options[:invalid_byte_sequence].nil?
|
47
|
+
|
48
|
+
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
49
|
+
puts "Computed options:\n#{pp(@options)}\n" if given_options[:verbose]
|
50
|
+
|
51
|
+
validate_options!(@options)
|
52
|
+
@options
|
53
|
+
end
|
54
|
+
|
55
|
+
# NOTE: this is not called when "parse" methods are tested by themselves
|
56
|
+
#
|
57
|
+
# ONLY FOR BACKWARDS-COMPATIBILITY
|
58
|
+
def default_options
|
59
|
+
DEFAULT_OPTIONS
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def validate_options!(options)
|
65
|
+
keys = options.keys
|
66
|
+
errors = []
|
67
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
68
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
69
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
70
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
71
|
+
end
|
72
|
+
|
73
|
+
def option_valid?(str)
|
74
|
+
return true if str.is_a?(Symbol) && str == :auto
|
75
|
+
return true if str.is_a?(String) && !str.empty?
|
76
|
+
|
77
|
+
false
|
78
|
+
end
|
79
|
+
|
80
|
+
def pp(value)
|
81
|
+
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|