smarter_csv 1.6.1 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -0
  3. data/CHANGELOG.md +22 -1
  4. data/CONTRIBUTORS.md +3 -0
  5. data/Gemfile +7 -4
  6. data/README.md +8 -6
  7. data/Rakefile +15 -13
  8. data/ext/smarter_csv/extconf.rb +14 -0
  9. data/ext/smarter_csv/smarter_csv.c +86 -0
  10. data/lib/extensions/hash.rb +4 -2
  11. data/lib/smarter_csv/version.rb +3 -1
  12. data/lib/smarter_csv.rb +519 -10
  13. data/smarter_csv.gemspec +22 -7
  14. metadata +54 -176
  15. data/.gitignore +0 -10
  16. data/.rspec +0 -2
  17. data/.travis.yml +0 -27
  18. data/lib/smarter_csv/smarter_csv.rb +0 -461
  19. data/spec/fixtures/additional_separator.csv +0 -6
  20. data/spec/fixtures/basic.csv +0 -8
  21. data/spec/fixtures/binary.csv +0 -1
  22. data/spec/fixtures/carriage_returns_n.csv +0 -18
  23. data/spec/fixtures/carriage_returns_quoted.csv +0 -3
  24. data/spec/fixtures/carriage_returns_r.csv +0 -1
  25. data/spec/fixtures/carriage_returns_rn.csv +0 -18
  26. data/spec/fixtures/chunk_cornercase.csv +0 -10
  27. data/spec/fixtures/duplicate_headers.csv +0 -3
  28. data/spec/fixtures/empty.csv +0 -5
  29. data/spec/fixtures/empty_columns_1.csv +0 -2
  30. data/spec/fixtures/empty_columns_2.csv +0 -2
  31. data/spec/fixtures/hard_sample.csv +0 -2
  32. data/spec/fixtures/ignore_comments.csv +0 -11
  33. data/spec/fixtures/ignore_comments2.csv +0 -3
  34. data/spec/fixtures/key_mapping.csv +0 -2
  35. data/spec/fixtures/line_endings_n.csv +0 -4
  36. data/spec/fixtures/line_endings_r.csv +0 -1
  37. data/spec/fixtures/line_endings_rn.csv +0 -4
  38. data/spec/fixtures/lots_of_columns.csv +0 -2
  39. data/spec/fixtures/malformed.csv +0 -3
  40. data/spec/fixtures/malformed_header.csv +0 -3
  41. data/spec/fixtures/money.csv +0 -3
  42. data/spec/fixtures/no_header.csv +0 -7
  43. data/spec/fixtures/numeric.csv +0 -5
  44. data/spec/fixtures/pets.csv +0 -5
  45. data/spec/fixtures/problematic.csv +0 -8
  46. data/spec/fixtures/quote_char.csv +0 -9
  47. data/spec/fixtures/quoted.csv +0 -5
  48. data/spec/fixtures/quoted2.csv +0 -4
  49. data/spec/fixtures/separator_colon.csv +0 -4
  50. data/spec/fixtures/separator_comma.csv +0 -4
  51. data/spec/fixtures/separator_pipe.csv +0 -4
  52. data/spec/fixtures/separator_semi.csv +0 -4
  53. data/spec/fixtures/separator_tab.csv +0 -4
  54. data/spec/fixtures/skip_lines.csv +0 -8
  55. data/spec/fixtures/trading.csv +0 -3
  56. data/spec/fixtures/user_import.csv +0 -3
  57. data/spec/fixtures/valid_unicode.csv +0 -5
  58. data/spec/fixtures/with_dashes.csv +0 -8
  59. data/spec/fixtures/with_dates.csv +0 -4
  60. data/spec/smarter_csv/additional_separator_spec.rb +0 -45
  61. data/spec/smarter_csv/binary_file2_spec.rb +0 -24
  62. data/spec/smarter_csv/binary_file_spec.rb +0 -22
  63. data/spec/smarter_csv/blank_spec.rb +0 -55
  64. data/spec/smarter_csv/carriage_return_spec.rb +0 -190
  65. data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
  66. data/spec/smarter_csv/close_file_spec.rb +0 -15
  67. data/spec/smarter_csv/column_separator_spec.rb +0 -95
  68. data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
  69. data/spec/smarter_csv/duplicate_headers_spec.rb +0 -76
  70. data/spec/smarter_csv/empty_columns_spec.rb +0 -74
  71. data/spec/smarter_csv/extenstions_spec.rb +0 -17
  72. data/spec/smarter_csv/hard_sample_spec.rb +0 -24
  73. data/spec/smarter_csv/header_transformation_spec.rb +0 -21
  74. data/spec/smarter_csv/ignore_comments_spec.rb +0 -45
  75. data/spec/smarter_csv/invalid_headers_spec.rb +0 -38
  76. data/spec/smarter_csv/keep_headers_spec.rb +0 -24
  77. data/spec/smarter_csv/key_mapping_spec.rb +0 -56
  78. data/spec/smarter_csv/line_ending_spec.rb +0 -43
  79. data/spec/smarter_csv/load_basic_spec.rb +0 -20
  80. data/spec/smarter_csv/malformed_spec.rb +0 -25
  81. data/spec/smarter_csv/no_header_spec.rb +0 -29
  82. data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
  83. data/spec/smarter_csv/parse/column_separator_spec.rb +0 -61
  84. data/spec/smarter_csv/parse/old_csv_library_spec.rb +0 -74
  85. data/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +0 -170
  86. data/spec/smarter_csv/problematic.rb +0 -34
  87. data/spec/smarter_csv/quoted_spec.rb +0 -52
  88. data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
  89. data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
  90. data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
  91. data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
  92. data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
  93. data/spec/smarter_csv/skip_lines_spec.rb +0 -29
  94. data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
  95. data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
  96. data/spec/smarter_csv/trading_spec.rb +0 -25
  97. data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
  98. data/spec/smarter_csv/value_converters_spec.rb +0 -52
  99. data/spec/spec/spec_helper.rb +0 -17
  100. data/spec/spec.opts +0 -2
  101. data/spec/spec_helper.rb +0 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9be5f053e15e157d7d28555b4de894d2761d5918203da45f5fc4e6c5adcc2a3f
4
- data.tar.gz: a47394f3d1f985960a64abf1a43ce6ebf9b8217af2c01a0c5f053af8c77c09ae
3
+ metadata.gz: ab42c787dba96369ba0f499294d5421cc2fc9a514b1cff039e5017d19fd2ff4c
4
+ data.tar.gz: f3605e56395d498169c449657945c00c677a75d17e2a7cbcca1a4f4e65aa45f2
5
5
  SHA512:
6
- metadata.gz: f27113af8a5771d89ac5c8783f1f69645c24bb576dafd97de17b0d8db8fff74dc396b42450802f9332c9f8b32a02ee18dabbc5dbc2daa91c75f957a678e99099
7
- data.tar.gz: 5b2c2f3cbfc17b43c030c4c4c261962818bfbb2ce1a0ecd88f394682b75b75a64a2d8b6afcc0e4b99b97d39ef4b645de143cbb5f595e7aa9d661e66b1a53e98f
6
+ metadata.gz: a264574b464219fd53a862be345c008fc0a93c076345d62be4c75935a148f9a469ccbea14fb2f821cedaada1f2b4ec875468ef50b3ae173bcbb76a5079d43406
7
+ data.tar.gz: 978236fa33bf4656eba89d929545c81ec2dc28b179890dd4254fb04cfa9b8646f4040aa2a62d44361dea221714f1cba0d7e4c858d338d30080c4a0e43c003552
data/.rubocop.yml ADDED
@@ -0,0 +1,133 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5 # purposely an old Ruby version
3
+
4
+ Layout/SpaceBeforeBlockBraces:
5
+ Enabled: false
6
+
7
+ Layout/SpaceInsideBlockBraces:
8
+ Enabled: false
9
+
10
+ Layout/SpaceInsideHashLiteralBraces:
11
+ Enabled: false
12
+
13
+ Layout/SpaceAroundOperators:
14
+ Enabled: false
15
+
16
+ Metrics/AbcSize:
17
+ Enabled: false
18
+
19
+ Metrics/BlockLength:
20
+ Enabled: false
21
+
22
+ Metrics/BlockNesting:
23
+ Enabled: false
24
+
25
+ Metrics/CyclomaticComplexity: # BS rule
26
+ Enabled: false
27
+
28
+ Metrics/MethodLength:
29
+ Enabled: false
30
+
31
+ Metrics/ModuleLength:
32
+ Enabled: false
33
+
34
+ Metrics/PerceivedComplexity: # BS rule
35
+ Enabled: false
36
+
37
+ Naming/PredicateName:
38
+ Enabled: false
39
+
40
+ Naming/VariableName:
41
+ Enabled: false
42
+
43
+ Naming/VariableNumber:
44
+ Enabled: false
45
+
46
+ Style/ClassEqualityComparison:
47
+ Enabled: false
48
+
49
+ Style/ConditionalAssignment:
50
+ Enabled: false
51
+
52
+ Style/CommentedKeyword:
53
+ Enabled: false
54
+
55
+ Style/Documentation:
56
+ Enabled: false
57
+
58
+ Style/DoubleNegation: # that's how to make true boolean results - this rule is nonsense
59
+ Enabled: false
60
+
61
+ Style/EmptyElse:
62
+ Enabled: false
63
+
64
+ Style/Encoding:
65
+ Enabled: false
66
+
67
+ Style/EvalWithLocation:
68
+ Enabled: false
69
+
70
+ Style/FormatString:
71
+ Enabled: false
72
+
73
+ Style/FormatStringToken:
74
+ Enabled: false
75
+
76
+ Style/GuardClause:
77
+ Enabled: false
78
+
79
+ Style/IfInsideElse:
80
+ Enabled: false
81
+
82
+ Style/IfUnlessModifier:
83
+ Enabled: false
84
+
85
+ Style/NestedTernaryOperator:
86
+ Enabled: false
87
+
88
+ Style/PreferredHashMethods:
89
+ Enabled: false
90
+
91
+ Style/NumericPredicate:
92
+ Enabled: false
93
+
94
+ Style/PercentLiteralDelimiters:
95
+ Enabled: false
96
+
97
+ Style/RegexpLiteral:
98
+ Enabled: false
99
+
100
+ Style/SafeNavigation:
101
+ Enabled: false
102
+
103
+ Style/SlicingWithRange:
104
+ Enabled: false
105
+
106
+ Style/SpecialGlobalVars: # DANGER: unsafe rule!!
107
+ Enabled: false
108
+
109
+ Style/StringLiterals:
110
+ Enabled: false
111
+ EnforcedStyle: double_quotes
112
+
113
+ Style/StringLiteralsInInterpolation:
114
+ Enabled: false
115
+ EnforcedStyle: double_quotes
116
+
117
+ Style/SymbolProc: # old Ruby versions can't do this
118
+ Enabled: false
119
+
120
+ Style/TrailingCommaInHashLiteral:
121
+ Enabled: false
122
+
123
+ Style/TrailingUnderscoreVariable:
124
+ Enabled: false
125
+
126
+ # Style/UnlessModifier:
127
+ # Enabled: false
128
+
129
+ Style/ZeroLengthPredicate:
130
+ Enabled: false
131
+
132
+ Layout/LineLength:
133
+ Max: 240
data/CHANGELOG.md CHANGED
@@ -1,8 +1,29 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.7.1 (2022-07-31)
5
+ * bugfix (thanks to Viacheslav Markin, Nicolas Rodriguez)
6
+ ## 1.7.0 (2022-06-26)
7
+ * added native code to accellerate line parsing by >10x over 1.6.0
8
+ * added option `acceleration`, defaulting to `true`, to enable native code.
9
+ Disable this option to use the ruby code for line parsing.
10
+ * increased test coverage to 100%
11
+ * rubocop changes
12
+
13
+ ## 1.7.0.pre5 (2022-06-20)
14
+ * fixed compiling
15
+ * rubocop changes
16
+ * published pre-release
17
+
18
+ ## 1.7.0.pre1 (2022-05-23)
19
+ * added native code to accellerate line parsing by >10x over 1.6.0
20
+ * added option `acceleration`, defaulting to `true`, to enable native code.
21
+ Disable this option to use the ruby code for line parsing.
22
+ * increased test coverage to 100%
23
+
4
24
  ## 1.6.1 (2022-05-06)
5
- * unused keys in `key_mapping` generate a warning, no longer raise an exception
25
+ * unused keys in `key_mapping` now generate a warning, no longer raise an exception
26
+ This is preferable when `key_mapping` is done defensively for variabilities in the CSV files.
6
27
 
7
28
  ## 1.6.0 (2022-05-03)
8
29
  * completely rewrote line parser
data/CONTRIBUTORS.md CHANGED
@@ -45,3 +45,6 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
45
45
  * [Sp6](https://github.com/sp6)
46
46
  * [Joel Fouse](https://github.com/jfouse)
47
47
  * [John Dell](https://github.com/spovich)
48
+ * [Viacheslav Markin](https://github.com/KXEinc)
49
+ * [Nicolas Rodriguez](https://github.com/n-rodriguez)
50
+ * [Hirotaka Mizutani ](https://github.com/hirotaka)
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in smarter_csv.gemspec
4
6
  gemspec
5
7
 
6
- gem "rake", "< 11"
8
+ gem "rake" # , "< 11"
9
+ gem "rake-compiler"
10
+
7
11
  gem 'pry'
8
12
 
9
- group :test do
10
- gem "rspec", "~> 2.99"
11
- end
13
+ gem "rspec"
14
+ gem "rubocop"
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
-
1
+ [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv)
2
+
2
3
  #### Service Announcement
3
4
 
4
5
  * Work towards SmarterCSV 2.0 is still on it's way, with much improved features, and more streamlined options.
@@ -20,10 +21,9 @@
20
21
 
21
22
  #### SmarterCSV 1.x
22
23
 
23
- `smarter_csv` is now 10 years old, and still kicking! 🎉🎉🎉
24
+ `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, or kicking-off batch jobs with Sidekiq.
24
25
 
25
- `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with Mongoid or ActiveRecord,
26
- and parallel processing with Resque or Sidekiq.
26
+ To create high-quality output, some options are enabled as a default. Please make sure to check the output and tweak the options accordingly.
27
27
 
28
28
  One `smarter_csv` user wrote:
29
29
 
@@ -346,8 +346,10 @@ Or install it yourself as:
346
346
 
347
347
  Please [open an Issue on GitHub](https://github.com/tilo/smarter_csv/issues) if you have feedback, new feature requests, or want to report a bug. Thank you!
348
348
 
349
- * please include a small sample CSV file
350
- * please mention your version of SmarterCSV, Ruby, Rails
349
+ For reporting issues, please:
350
+ * include a small sample CSV file
351
+ * open a pull-request adding a test that demonstrates the issue
352
+ * mention your version of SmarterCSV, Ruby, Rails
351
353
 
352
354
  ## [A Special Thanks to all Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
353
355
 
data/Rakefile CHANGED
@@ -1,19 +1,21 @@
1
- #!/usr/bin/env rake
1
+ # frozen_string_literal: true
2
+
2
3
  require "bundler/gem_tasks"
3
- require 'rubygems'
4
- require 'rake'
5
4
  require 'rspec/core/rake_task'
6
5
 
7
- task :default => :spec
6
+ RSpec::Core::RakeTask.new(:spec)
8
7
 
9
- desc "Run RSpec"
10
- RSpec::Core::RakeTask.new do |t|
11
- # t.verbose = false
12
- end
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
13
11
 
14
- desc 'Run spec with coverage'
15
- task :coverage do
16
- ENV['COVERAGE'] = 'true'
17
- Rake::Task['spec'].execute
18
- `open coverage/index.html`
12
+ require "rake/extensiontask"
13
+
14
+ task build: :compile
15
+
16
+ Rake::ExtensionTask.new("smarter_csv") do |ext|
17
+ ext.ext_dir = "ext/smarter_csv"
19
18
  end
19
+
20
+ # task default: %i[clobber compile spec rubocop]
21
+ task default: %i[clobber compile spec]
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ require "rbconfig"
6
+ if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
7
+ fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "$(cflags)")
8
+ puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
9
+ RbConfig::MAKEFILE_CONFIG["CFLAGS"] = fixed_CFLAGS
10
+ end
11
+
12
+ CONFIG["optflags"] = "-O3"
13
+
14
+ create_makefile('smarter_csv/smarter_csv')
@@ -0,0 +1,86 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include <stdio.h>
4
+ #include <stdbool.h>
5
+
6
+ #ifndef bool
7
+ #define bool int
8
+ #define false ((bool)0)
9
+ #define true ((bool)1)
10
+ #endif
11
+
12
+ /*
13
+ max_size: pass nil if no limit is specified
14
+ */
15
+ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size) {
16
+ if (RB_TYPE_P(line, T_NIL) == 1) {
17
+ return rb_ary_new();
18
+
19
+ } else if (RB_TYPE_P(line, T_STRING) == 1) {
20
+ rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
21
+ char *startP = RSTRING_PTR(line); /* may not be null terminated */
22
+ long line_len = RSTRING_LEN(line);
23
+ char *endP = startP + line_len ; /* points behind the string */
24
+ char *p = startP;
25
+
26
+ char *col_sepP = RSTRING_PTR(col_sep);
27
+ long col_sep_len = RSTRING_LEN(col_sep);
28
+
29
+ char *quoteP = RSTRING_PTR(quote_char);
30
+ long quote_len = RSTRING_LEN(quote_char);
31
+ long quote_count = 0;
32
+
33
+ bool col_sep_found = true;
34
+
35
+ VALUE elements = rb_ary_new();
36
+ VALUE field;
37
+ long i;
38
+
39
+ while (p < endP) {
40
+ /* does the remaining string start with col_sep ? */
41
+ col_sep_found = true;
42
+ for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
43
+ col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
44
+ }
45
+ /* if col_sep was found and we have even quotes */
46
+ if (col_sep_found && (quote_count % 2 == 0)) {
47
+ /* if max_size != nil && lements.size >= header_size */
48
+ if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
49
+ break;
50
+ } else {
51
+ /* push that field with original encoding onto the results */
52
+ field = rb_enc_str_new(startP, p - startP, encoding);
53
+ rb_ary_push(elements, field);
54
+
55
+ p += col_sep_len;
56
+ startP = p;
57
+ }
58
+ } else {
59
+ if (*p == *quoteP) {
60
+ quote_count += 1;
61
+ }
62
+ p++;
63
+ }
64
+ } /* while */
65
+
66
+ /* check if the last part of the line needs to be processed */
67
+ if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
68
+ /* copy the remaining line as a field with original encoding onto the results */
69
+ field = rb_enc_str_new(startP, endP - startP, encoding);
70
+ rb_ary_push(elements, field);
71
+ }
72
+
73
+ return elements;
74
+ }
75
+
76
+ rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
77
+ }
78
+
79
+
80
+ VALUE SmarterCSV = Qnil;
81
+
82
+ void Init_smarter_csv(void) {
83
+ VALUE SmarterCSV = rb_define_module("SmarterCSV");
84
+
85
+ rb_define_module_function(SmarterCSV, "parse_csv_line_c", rb_parse_csv_line, 4);
86
+ }
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # the following extension for class Hash is needed (from Facets of Ruby library):
2
4
 
3
5
  class Hash
4
- def self.zip(keys,values) # from Facets of Ruby library
5
- (keys.zip(values)).to_h
6
+ def self.zip(keys, values) # from Facets of Ruby library
7
+ keys.zip(values).to_h
6
8
  end
7
9
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SmarterCSV
2
- VERSION = "1.6.1"
4
+ VERSION = "1.7.1"
3
5
  end