smarter_csv 1.6.1 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -0
  3. data/CHANGELOG.md +22 -1
  4. data/CONTRIBUTORS.md +3 -0
  5. data/Gemfile +7 -4
  6. data/README.md +8 -6
  7. data/Rakefile +15 -13
  8. data/ext/smarter_csv/extconf.rb +14 -0
  9. data/ext/smarter_csv/smarter_csv.c +86 -0
  10. data/lib/extensions/hash.rb +4 -2
  11. data/lib/smarter_csv/version.rb +3 -1
  12. data/lib/smarter_csv.rb +519 -10
  13. data/smarter_csv.gemspec +22 -7
  14. metadata +54 -176
  15. data/.gitignore +0 -10
  16. data/.rspec +0 -2
  17. data/.travis.yml +0 -27
  18. data/lib/smarter_csv/smarter_csv.rb +0 -461
  19. data/spec/fixtures/additional_separator.csv +0 -6
  20. data/spec/fixtures/basic.csv +0 -8
  21. data/spec/fixtures/binary.csv +0 -1
  22. data/spec/fixtures/carriage_returns_n.csv +0 -18
  23. data/spec/fixtures/carriage_returns_quoted.csv +0 -3
  24. data/spec/fixtures/carriage_returns_r.csv +0 -1
  25. data/spec/fixtures/carriage_returns_rn.csv +0 -18
  26. data/spec/fixtures/chunk_cornercase.csv +0 -10
  27. data/spec/fixtures/duplicate_headers.csv +0 -3
  28. data/spec/fixtures/empty.csv +0 -5
  29. data/spec/fixtures/empty_columns_1.csv +0 -2
  30. data/spec/fixtures/empty_columns_2.csv +0 -2
  31. data/spec/fixtures/hard_sample.csv +0 -2
  32. data/spec/fixtures/ignore_comments.csv +0 -11
  33. data/spec/fixtures/ignore_comments2.csv +0 -3
  34. data/spec/fixtures/key_mapping.csv +0 -2
  35. data/spec/fixtures/line_endings_n.csv +0 -4
  36. data/spec/fixtures/line_endings_r.csv +0 -1
  37. data/spec/fixtures/line_endings_rn.csv +0 -4
  38. data/spec/fixtures/lots_of_columns.csv +0 -2
  39. data/spec/fixtures/malformed.csv +0 -3
  40. data/spec/fixtures/malformed_header.csv +0 -3
  41. data/spec/fixtures/money.csv +0 -3
  42. data/spec/fixtures/no_header.csv +0 -7
  43. data/spec/fixtures/numeric.csv +0 -5
  44. data/spec/fixtures/pets.csv +0 -5
  45. data/spec/fixtures/problematic.csv +0 -8
  46. data/spec/fixtures/quote_char.csv +0 -9
  47. data/spec/fixtures/quoted.csv +0 -5
  48. data/spec/fixtures/quoted2.csv +0 -4
  49. data/spec/fixtures/separator_colon.csv +0 -4
  50. data/spec/fixtures/separator_comma.csv +0 -4
  51. data/spec/fixtures/separator_pipe.csv +0 -4
  52. data/spec/fixtures/separator_semi.csv +0 -4
  53. data/spec/fixtures/separator_tab.csv +0 -4
  54. data/spec/fixtures/skip_lines.csv +0 -8
  55. data/spec/fixtures/trading.csv +0 -3
  56. data/spec/fixtures/user_import.csv +0 -3
  57. data/spec/fixtures/valid_unicode.csv +0 -5
  58. data/spec/fixtures/with_dashes.csv +0 -8
  59. data/spec/fixtures/with_dates.csv +0 -4
  60. data/spec/smarter_csv/additional_separator_spec.rb +0 -45
  61. data/spec/smarter_csv/binary_file2_spec.rb +0 -24
  62. data/spec/smarter_csv/binary_file_spec.rb +0 -22
  63. data/spec/smarter_csv/blank_spec.rb +0 -55
  64. data/spec/smarter_csv/carriage_return_spec.rb +0 -190
  65. data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
  66. data/spec/smarter_csv/close_file_spec.rb +0 -15
  67. data/spec/smarter_csv/column_separator_spec.rb +0 -95
  68. data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
  69. data/spec/smarter_csv/duplicate_headers_spec.rb +0 -76
  70. data/spec/smarter_csv/empty_columns_spec.rb +0 -74
  71. data/spec/smarter_csv/extenstions_spec.rb +0 -17
  72. data/spec/smarter_csv/hard_sample_spec.rb +0 -24
  73. data/spec/smarter_csv/header_transformation_spec.rb +0 -21
  74. data/spec/smarter_csv/ignore_comments_spec.rb +0 -45
  75. data/spec/smarter_csv/invalid_headers_spec.rb +0 -38
  76. data/spec/smarter_csv/keep_headers_spec.rb +0 -24
  77. data/spec/smarter_csv/key_mapping_spec.rb +0 -56
  78. data/spec/smarter_csv/line_ending_spec.rb +0 -43
  79. data/spec/smarter_csv/load_basic_spec.rb +0 -20
  80. data/spec/smarter_csv/malformed_spec.rb +0 -25
  81. data/spec/smarter_csv/no_header_spec.rb +0 -29
  82. data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
  83. data/spec/smarter_csv/parse/column_separator_spec.rb +0 -61
  84. data/spec/smarter_csv/parse/old_csv_library_spec.rb +0 -74
  85. data/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +0 -170
  86. data/spec/smarter_csv/problematic.rb +0 -34
  87. data/spec/smarter_csv/quoted_spec.rb +0 -52
  88. data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
  89. data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
  90. data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
  91. data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
  92. data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
  93. data/spec/smarter_csv/skip_lines_spec.rb +0 -29
  94. data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
  95. data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
  96. data/spec/smarter_csv/trading_spec.rb +0 -25
  97. data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
  98. data/spec/smarter_csv/value_converters_spec.rb +0 -52
  99. data/spec/spec/spec_helper.rb +0 -17
  100. data/spec/spec.opts +0 -2
  101. data/spec/spec_helper.rb +0 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9be5f053e15e157d7d28555b4de894d2761d5918203da45f5fc4e6c5adcc2a3f
4
- data.tar.gz: a47394f3d1f985960a64abf1a43ce6ebf9b8217af2c01a0c5f053af8c77c09ae
3
+ metadata.gz: ab42c787dba96369ba0f499294d5421cc2fc9a514b1cff039e5017d19fd2ff4c
4
+ data.tar.gz: f3605e56395d498169c449657945c00c677a75d17e2a7cbcca1a4f4e65aa45f2
5
5
  SHA512:
6
- metadata.gz: f27113af8a5771d89ac5c8783f1f69645c24bb576dafd97de17b0d8db8fff74dc396b42450802f9332c9f8b32a02ee18dabbc5dbc2daa91c75f957a678e99099
7
- data.tar.gz: 5b2c2f3cbfc17b43c030c4c4c261962818bfbb2ce1a0ecd88f394682b75b75a64a2d8b6afcc0e4b99b97d39ef4b645de143cbb5f595e7aa9d661e66b1a53e98f
6
+ metadata.gz: a264574b464219fd53a862be345c008fc0a93c076345d62be4c75935a148f9a469ccbea14fb2f821cedaada1f2b4ec875468ef50b3ae173bcbb76a5079d43406
7
+ data.tar.gz: 978236fa33bf4656eba89d929545c81ec2dc28b179890dd4254fb04cfa9b8646f4040aa2a62d44361dea221714f1cba0d7e4c858d338d30080c4a0e43c003552
data/.rubocop.yml ADDED
@@ -0,0 +1,133 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5 # purposely an old Ruby version
3
+
4
+ Layout/SpaceBeforeBlockBraces:
5
+ Enabled: false
6
+
7
+ Layout/SpaceInsideBlockBraces:
8
+ Enabled: false
9
+
10
+ Layout/SpaceInsideHashLiteralBraces:
11
+ Enabled: false
12
+
13
+ Layout/SpaceAroundOperators:
14
+ Enabled: false
15
+
16
+ Metrics/AbcSize:
17
+ Enabled: false
18
+
19
+ Metrics/BlockLength:
20
+ Enabled: false
21
+
22
+ Metrics/BlockNesting:
23
+ Enabled: false
24
+
25
+ Metrics/CyclomaticComplexity: # BS rule
26
+ Enabled: false
27
+
28
+ Metrics/MethodLength:
29
+ Enabled: false
30
+
31
+ Metrics/ModuleLength:
32
+ Enabled: false
33
+
34
+ Metrics/PerceivedComplexity: # BS rule
35
+ Enabled: false
36
+
37
+ Naming/PredicateName:
38
+ Enabled: false
39
+
40
+ Naming/VariableName:
41
+ Enabled: false
42
+
43
+ Naming/VariableNumber:
44
+ Enabled: false
45
+
46
+ Style/ClassEqualityComparison:
47
+ Enabled: false
48
+
49
+ Style/ConditionalAssignment:
50
+ Enabled: false
51
+
52
+ Style/CommentedKeyword:
53
+ Enabled: false
54
+
55
+ Style/Documentation:
56
+ Enabled: false
57
+
58
+ Style/DoubleNegation: # that's how to make true boolean results - this rule is nonsense
59
+ Enabled: false
60
+
61
+ Style/EmptyElse:
62
+ Enabled: false
63
+
64
+ Style/Encoding:
65
+ Enabled: false
66
+
67
+ Style/EvalWithLocation:
68
+ Enabled: false
69
+
70
+ Style/FormatString:
71
+ Enabled: false
72
+
73
+ Style/FormatStringToken:
74
+ Enabled: false
75
+
76
+ Style/GuardClause:
77
+ Enabled: false
78
+
79
+ Style/IfInsideElse:
80
+ Enabled: false
81
+
82
+ Style/IfUnlessModifier:
83
+ Enabled: false
84
+
85
+ Style/NestedTernaryOperator:
86
+ Enabled: false
87
+
88
+ Style/PreferredHashMethods:
89
+ Enabled: false
90
+
91
+ Style/NumericPredicate:
92
+ Enabled: false
93
+
94
+ Style/PercentLiteralDelimiters:
95
+ Enabled: false
96
+
97
+ Style/RegexpLiteral:
98
+ Enabled: false
99
+
100
+ Style/SafeNavigation:
101
+ Enabled: false
102
+
103
+ Style/SlicingWithRange:
104
+ Enabled: false
105
+
106
+ Style/SpecialGlobalVars: # DANGER: unsafe rule!!
107
+ Enabled: false
108
+
109
+ Style/StringLiterals:
110
+ Enabled: false
111
+ EnforcedStyle: double_quotes
112
+
113
+ Style/StringLiteralsInInterpolation:
114
+ Enabled: false
115
+ EnforcedStyle: double_quotes
116
+
117
+ Style/SymbolProc: # old Ruby versions can't do this
118
+ Enabled: false
119
+
120
+ Style/TrailingCommaInHashLiteral:
121
+ Enabled: false
122
+
123
+ Style/TrailingUnderscoreVariable:
124
+ Enabled: false
125
+
126
+ # Style/UnlessModifier:
127
+ # Enabled: false
128
+
129
+ Style/ZeroLengthPredicate:
130
+ Enabled: false
131
+
132
+ Layout/LineLength:
133
+ Max: 240
data/CHANGELOG.md CHANGED
@@ -1,8 +1,29 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.7.1 (2022-07-31)
5
+ * bugfix (thanks to Viacheslav Markin, Nicolas Rodriguez)
6
+ ## 1.7.0 (2022-06-26)
7
+ * added native code to accellerate line parsing by >10x over 1.6.0
8
+ * added option `acceleration`, defaulting to `true`, to enable native code.
9
+ Disable this option to use the ruby code for line parsing.
10
+ * increased test coverage to 100%
11
+ * rubocop changes
12
+
13
+ ## 1.7.0.pre5 (2022-06-20)
14
+ * fixed compiling
15
+ * rubocop changes
16
+ * published pre-release
17
+
18
+ ## 1.7.0.pre1 (2022-05-23)
19
+ * added native code to accellerate line parsing by >10x over 1.6.0
20
+ * added option `acceleration`, defaulting to `true`, to enable native code.
21
+ Disable this option to use the ruby code for line parsing.
22
+ * increased test coverage to 100%
23
+
4
24
  ## 1.6.1 (2022-05-06)
5
- * unused keys in `key_mapping` generate a warning, no longer raise an exception
25
+ * unused keys in `key_mapping` now generate a warning, no longer raise an exception
26
+ This is preferable when `key_mapping` is done defensively for variabilities in the CSV files.
6
27
 
7
28
  ## 1.6.0 (2022-05-03)
8
29
  * completely rewrote line parser
data/CONTRIBUTORS.md CHANGED
@@ -45,3 +45,6 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
45
45
  * [Sp6](https://github.com/sp6)
46
46
  * [Joel Fouse](https://github.com/jfouse)
47
47
  * [John Dell](https://github.com/spovich)
48
+ * [Viacheslav Markin](https://github.com/KXEinc)
49
+ * [Nicolas Rodriguez](https://github.com/n-rodriguez)
50
+ * [Hirotaka Mizutani ](https://github.com/hirotaka)
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in smarter_csv.gemspec
4
6
  gemspec
5
7
 
6
- gem "rake", "< 11"
8
+ gem "rake" # , "< 11"
9
+ gem "rake-compiler"
10
+
7
11
  gem 'pry'
8
12
 
9
- group :test do
10
- gem "rspec", "~> 2.99"
11
- end
13
+ gem "rspec"
14
+ gem "rubocop"
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
-
1
+ [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv)
2
+
2
3
  #### Service Announcement
3
4
 
4
5
  * Work towards SmarterCSV 2.0 is still on it's way, with much improved features, and more streamlined options.
@@ -20,10 +21,9 @@
20
21
 
21
22
  #### SmarterCSV 1.x
22
23
 
23
- `smarter_csv` is now 10 years old, and still kicking! 🎉🎉🎉
24
+ `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, or kicking-off batch jobs with Sidekiq.
24
25
 
25
- `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with Mongoid or ActiveRecord,
26
- and parallel processing with Resque or Sidekiq.
26
+ To create high-quality output, some options are enabled as a default. Please make sure to check the output and tweak the options accordingly.
27
27
 
28
28
  One `smarter_csv` user wrote:
29
29
 
@@ -346,8 +346,10 @@ Or install it yourself as:
346
346
 
347
347
  Please [open an Issue on GitHub](https://github.com/tilo/smarter_csv/issues) if you have feedback, new feature requests, or want to report a bug. Thank you!
348
348
 
349
- * please include a small sample CSV file
350
- * please mention your version of SmarterCSV, Ruby, Rails
349
+ For reporting issues, please:
350
+ * include a small sample CSV file
351
+ * open a pull-request adding a test that demonstrates the issue
352
+ * mention your version of SmarterCSV, Ruby, Rails
351
353
 
352
354
  ## [A Special Thanks to all Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
353
355
 
data/Rakefile CHANGED
@@ -1,19 +1,21 @@
1
- #!/usr/bin/env rake
1
+ # frozen_string_literal: true
2
+
2
3
  require "bundler/gem_tasks"
3
- require 'rubygems'
4
- require 'rake'
5
4
  require 'rspec/core/rake_task'
6
5
 
7
- task :default => :spec
6
+ RSpec::Core::RakeTask.new(:spec)
8
7
 
9
- desc "Run RSpec"
10
- RSpec::Core::RakeTask.new do |t|
11
- # t.verbose = false
12
- end
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
13
11
 
14
- desc 'Run spec with coverage'
15
- task :coverage do
16
- ENV['COVERAGE'] = 'true'
17
- Rake::Task['spec'].execute
18
- `open coverage/index.html`
12
+ require "rake/extensiontask"
13
+
14
+ task build: :compile
15
+
16
+ Rake::ExtensionTask.new("smarter_csv") do |ext|
17
+ ext.ext_dir = "ext/smarter_csv"
19
18
  end
19
+
20
+ # task default: %i[clobber compile spec rubocop]
21
+ task default: %i[clobber compile spec]
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ require "rbconfig"
6
+ if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
7
+ fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "$(cflags)")
8
+ puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
9
+ RbConfig::MAKEFILE_CONFIG["CFLAGS"] = fixed_CFLAGS
10
+ end
11
+
12
+ CONFIG["optflags"] = "-O3"
13
+
14
+ create_makefile('smarter_csv/smarter_csv')
@@ -0,0 +1,86 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include <stdio.h>
4
+ #include <stdbool.h>
5
+
6
+ #ifndef bool
7
+ #define bool int
8
+ #define false ((bool)0)
9
+ #define true ((bool)1)
10
+ #endif
11
+
12
+ /*
13
+ max_size: pass nil if no limit is specified
14
+ */
15
+ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size) {
16
+ if (RB_TYPE_P(line, T_NIL) == 1) {
17
+ return rb_ary_new();
18
+
19
+ } else if (RB_TYPE_P(line, T_STRING) == 1) {
20
+ rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
21
+ char *startP = RSTRING_PTR(line); /* may not be null terminated */
22
+ long line_len = RSTRING_LEN(line);
23
+ char *endP = startP + line_len ; /* points behind the string */
24
+ char *p = startP;
25
+
26
+ char *col_sepP = RSTRING_PTR(col_sep);
27
+ long col_sep_len = RSTRING_LEN(col_sep);
28
+
29
+ char *quoteP = RSTRING_PTR(quote_char);
30
+ long quote_len = RSTRING_LEN(quote_char);
31
+ long quote_count = 0;
32
+
33
+ bool col_sep_found = true;
34
+
35
+ VALUE elements = rb_ary_new();
36
+ VALUE field;
37
+ long i;
38
+
39
+ while (p < endP) {
40
+ /* does the remaining string start with col_sep ? */
41
+ col_sep_found = true;
42
+ for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
43
+ col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
44
+ }
45
+ /* if col_sep was found and we have even quotes */
46
+ if (col_sep_found && (quote_count % 2 == 0)) {
47
+ /* if max_size != nil && lements.size >= header_size */
48
+ if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
49
+ break;
50
+ } else {
51
+ /* push that field with original encoding onto the results */
52
+ field = rb_enc_str_new(startP, p - startP, encoding);
53
+ rb_ary_push(elements, field);
54
+
55
+ p += col_sep_len;
56
+ startP = p;
57
+ }
58
+ } else {
59
+ if (*p == *quoteP) {
60
+ quote_count += 1;
61
+ }
62
+ p++;
63
+ }
64
+ } /* while */
65
+
66
+ /* check if the last part of the line needs to be processed */
67
+ if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
68
+ /* copy the remaining line as a field with original encoding onto the results */
69
+ field = rb_enc_str_new(startP, endP - startP, encoding);
70
+ rb_ary_push(elements, field);
71
+ }
72
+
73
+ return elements;
74
+ }
75
+
76
+ rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
77
+ }
78
+
79
+
80
+ VALUE SmarterCSV = Qnil;
81
+
82
+ void Init_smarter_csv(void) {
83
+ VALUE SmarterCSV = rb_define_module("SmarterCSV");
84
+
85
+ rb_define_module_function(SmarterCSV, "parse_csv_line_c", rb_parse_csv_line, 4);
86
+ }
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # the following extension for class Hash is needed (from Facets of Ruby library):
2
4
 
3
5
  class Hash
4
- def self.zip(keys,values) # from Facets of Ruby library
5
- (keys.zip(values)).to_h
6
+ def self.zip(keys, values) # from Facets of Ruby library
7
+ keys.zip(values).to_h
6
8
  end
7
9
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SmarterCSV
2
- VERSION = "1.6.1"
4
+ VERSION = "1.7.1"
3
5
  end