smarter_csv 1.1.5 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +5 -5
  2. data/.rspec +1 -2
  3. data/.rubocop.yml +154 -0
  4. data/CHANGELOG.md +364 -0
  5. data/CONTRIBUTORS.md +56 -0
  6. data/Gemfile +7 -2
  7. data/LICENSE.txt +21 -0
  8. data/README.md +44 -441
  9. data/Rakefile +39 -19
  10. data/TO_DO_v2.md +14 -0
  11. data/docs/_introduction.md +56 -0
  12. data/docs/basic_api.md +157 -0
  13. data/docs/batch_processing.md +68 -0
  14. data/docs/data_transformations.md +50 -0
  15. data/docs/examples.md +75 -0
  16. data/docs/header_transformations.md +113 -0
  17. data/docs/header_validations.md +36 -0
  18. data/docs/options.md +98 -0
  19. data/docs/row_col_sep.md +104 -0
  20. data/docs/value_converters.md +68 -0
  21. data/ext/smarter_csv/extconf.rb +14 -0
  22. data/ext/smarter_csv/smarter_csv.c +97 -0
  23. data/lib/smarter_csv/auto_detection.rb +78 -0
  24. data/lib/smarter_csv/errors.rb +16 -0
  25. data/lib/smarter_csv/file_io.rb +50 -0
  26. data/lib/smarter_csv/hash_transformations.rb +91 -0
  27. data/lib/smarter_csv/header_transformations.rb +63 -0
  28. data/lib/smarter_csv/header_validations.rb +34 -0
  29. data/lib/smarter_csv/headers.rb +68 -0
  30. data/lib/smarter_csv/options.rb +95 -0
  31. data/lib/smarter_csv/parser.rb +90 -0
  32. data/lib/smarter_csv/reader.rb +243 -0
  33. data/lib/smarter_csv/version.rb +3 -1
  34. data/lib/smarter_csv/writer.rb +116 -0
  35. data/lib/smarter_csv.rb +91 -3
  36. data/smarter_csv.gemspec +43 -20
  37. metadata +122 -137
  38. data/.gitignore +0 -8
  39. data/.travis.yml +0 -19
  40. data/lib/extensions/hash.rb +0 -7
  41. data/lib/smarter_csv/smarter_csv.rb +0 -281
  42. data/spec/fixtures/basic.csv +0 -8
  43. data/spec/fixtures/binary.csv +0 -1
  44. data/spec/fixtures/carriage_returns_n.csv +0 -18
  45. data/spec/fixtures/carriage_returns_quoted.csv +0 -3
  46. data/spec/fixtures/carriage_returns_r.csv +0 -1
  47. data/spec/fixtures/carriage_returns_rn.csv +0 -18
  48. data/spec/fixtures/chunk_cornercase.csv +0 -10
  49. data/spec/fixtures/empty.csv +0 -5
  50. data/spec/fixtures/line_endings_n.csv +0 -4
  51. data/spec/fixtures/line_endings_r.csv +0 -1
  52. data/spec/fixtures/line_endings_rn.csv +0 -4
  53. data/spec/fixtures/lots_of_columns.csv +0 -2
  54. data/spec/fixtures/malformed.csv +0 -3
  55. data/spec/fixtures/malformed_header.csv +0 -3
  56. data/spec/fixtures/money.csv +0 -3
  57. data/spec/fixtures/no_header.csv +0 -7
  58. data/spec/fixtures/numeric.csv +0 -5
  59. data/spec/fixtures/pets.csv +0 -5
  60. data/spec/fixtures/quoted.csv +0 -5
  61. data/spec/fixtures/separator.csv +0 -4
  62. data/spec/fixtures/skip_lines.csv +0 -8
  63. data/spec/fixtures/valid_unicode.csv +0 -5
  64. data/spec/fixtures/with_dashes.csv +0 -8
  65. data/spec/fixtures/with_dates.csv +0 -4
  66. data/spec/smarter_csv/binary_file2_spec.rb +0 -24
  67. data/spec/smarter_csv/binary_file_spec.rb +0 -22
  68. data/spec/smarter_csv/carriage_return_spec.rb +0 -170
  69. data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
  70. data/spec/smarter_csv/close_file_spec.rb +0 -15
  71. data/spec/smarter_csv/column_separator_spec.rb +0 -11
  72. data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
  73. data/spec/smarter_csv/extenstions_spec.rb +0 -17
  74. data/spec/smarter_csv/header_transformation_spec.rb +0 -21
  75. data/spec/smarter_csv/keep_headers_spec.rb +0 -24
  76. data/spec/smarter_csv/key_mapping_spec.rb +0 -25
  77. data/spec/smarter_csv/line_ending_spec.rb +0 -43
  78. data/spec/smarter_csv/load_basic_spec.rb +0 -20
  79. data/spec/smarter_csv/malformed_spec.rb +0 -21
  80. data/spec/smarter_csv/no_header_spec.rb +0 -24
  81. data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
  82. data/spec/smarter_csv/quoted_spec.rb +0 -23
  83. data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
  84. data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
  85. data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
  86. data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
  87. data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
  88. data/spec/smarter_csv/skip_lines_spec.rb +0 -29
  89. data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
  90. data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
  91. data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
  92. data/spec/smarter_csv/value_converters_spec.rb +0 -52
  93. data/spec/spec/spec_helper.rb +0 -17
  94. data/spec/spec.opts +0 -2
  95. data/spec/spec_helper.rb +0 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 042aadb2bc5426a07a64f09e781bccbd728e8052
4
- data.tar.gz: ba48c2e303591d4027e05d1208c225381d362857
2
+ SHA256:
3
+ metadata.gz: 05aa9e7d2d22ec6e1beb3790e2b727cd3e615cadcd537716f2dfbb190cc87a09
4
+ data.tar.gz: e37b072c7c81a3b6cdc6192ed2bfab046c924f3aa7a8a3e2a66f55fafa25b7ff
5
5
  SHA512:
6
- metadata.gz: 58cb92edabb46bdcb48598d4b4b02b5f0f09cc63378e818ac672daf8d722b5fbf1b246df5db262dff306e87943a2bb2bebbb753944adc5449b19cd5a1475c00b
7
- data.tar.gz: 31fe30f2b2027274a5252c55b234b120327be6ce652f7ad71232bd8a920e33d30cbae42577fff398d0a61574dc17b3016cd3fa1d520eec3dd4636569cc62860e
6
+ metadata.gz: 07c149aaa123ef75fb65fd596fbab64359e24cf2b8606fe406d714358a1c14696fa9ecb420e6dd0a95d40f6af6d41e4988b16df9eac4346d9e1295e3c32f22b1
7
+ data.tar.gz: 71341c1cf1092fabbfe9106ce533adb872e2bc1b0c30fbc032f3ceaea1832e2ddef5d4156f1465658a67dddaae508cd23b12cfe9fdf34edea3f1f3ede0385688
data/.rspec CHANGED
@@ -1,2 +1 @@
1
- --color
2
- --format documentation
1
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,154 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5 # purposely an old Ruby version
3
+
4
+ Layout/SpaceBeforeBlockBraces:
5
+ Enabled: false
6
+
7
+ Layout/SpaceInsideBlockBraces:
8
+ Enabled: false
9
+
10
+ Layout/SpaceInsideHashLiteralBraces:
11
+ Enabled: false
12
+
13
+ Layout/SpaceAroundOperators:
14
+ Enabled: false
15
+
16
+ Metrics/AbcSize:
17
+ Enabled: false
18
+
19
+ Metrics/BlockLength:
20
+ Enabled: false
21
+
22
+ Metrics/BlockNesting:
23
+ Enabled: false
24
+
25
+ Metrics/ClassLength:
26
+ Enabled: false
27
+
28
+ Metrics/CyclomaticComplexity:
29
+ Enabled: false
30
+
31
+ Metrics/MethodLength:
32
+ Enabled: false
33
+
34
+ Metrics/ModuleLength:
35
+ Enabled: false
36
+
37
+ Metrics/PerceivedComplexity:
38
+ Enabled: false
39
+
40
+ Naming/PredicateName:
41
+ Enabled: false
42
+
43
+ Naming/VariableName:
44
+ Enabled: false
45
+
46
+ Naming/VariableNumber:
47
+ Enabled: false
48
+
49
+ Style/AccessorGrouping: # not needed
50
+ Enabled: false
51
+
52
+ Style/ClassEqualityComparison:
53
+ Enabled: false
54
+
55
+ Style/ClassMethods:
56
+ Enabled: false
57
+
58
+ Style/ConditionalAssignment:
59
+ Enabled: false
60
+
61
+ Style/CommentedKeyword:
62
+ Enabled: false
63
+
64
+ Style/Documentation:
65
+ Enabled: false
66
+
67
+ Style/DoubleNegation: # that's how to make true boolean results - this rule is nonsense
68
+ Enabled: false
69
+
70
+ Style/EmptyElse:
71
+ Enabled: false
72
+
73
+ Style/Encoding:
74
+ Enabled: false
75
+
76
+ Style/EvalWithLocation:
77
+ Enabled: false
78
+
79
+ Style/FormatString:
80
+ Enabled: false
81
+
82
+ Style/FormatStringToken:
83
+ Enabled: false
84
+
85
+ Style/GuardClause:
86
+ Enabled: false
87
+
88
+ Style/IfInsideElse:
89
+ Enabled: false
90
+
91
+ Style/IfUnlessModifier:
92
+ Enabled: false
93
+
94
+ Style/InverseMethods:
95
+ Enabled: false
96
+
97
+ Style/NestedTernaryOperator:
98
+ Enabled: false
99
+
100
+ Style/PreferredHashMethods:
101
+ Enabled: false
102
+
103
+ Style/NumericPredicate:
104
+ Enabled: false
105
+
106
+ Style/PercentLiteralDelimiters:
107
+ Enabled: false
108
+
109
+ Style/RegexpLiteral:
110
+ Enabled: false
111
+
112
+ Style/SafeNavigation:
113
+ Enabled: false
114
+
115
+ Style/SlicingWithRange:
116
+ Enabled: false
117
+
118
+ Style/SpecialGlobalVars: # DANGER: unsafe rule!!
119
+ Enabled: false
120
+
121
+ Style/StringConcatenation:
122
+ Enabled: false
123
+
124
+ Style/StringLiterals:
125
+ Enabled: false
126
+ EnforcedStyle: double_quotes
127
+
128
+ Style/StringLiteralsInInterpolation:
129
+ Enabled: false
130
+ EnforcedStyle: double_quotes
131
+
132
+ Style/SymbolArray:
133
+ Enabled: false
134
+
135
+ Style/SymbolProc: # old Ruby versions can't do this
136
+ Enabled: false
137
+
138
+ Style/TrailingCommaInHashLiteral:
139
+ Enabled: false
140
+
141
+ Style/TrailingUnderscoreVariable:
142
+ Enabled: false
143
+
144
+ Style/TrivialAccessors:
145
+ Enabled: false
146
+
147
+ # Style/UnlessModifier:
148
+ # Enabled: false
149
+
150
+ Style/ZeroLengthPredicate:
151
+ Enabled: false
152
+
153
+ Layout/LineLength:
154
+ Max: 256
data/CHANGELOG.md ADDED
@@ -0,0 +1,364 @@
1
+
2
+ # SmarterCSV 1.x Change Log
3
+
4
+ ## 1.12.1 (2024-07-10)
5
+ * Improved column separator detection by ignoring quoted sections [#276](https://github.com/tilo/smarter_csv/pull/276) (thanks to Nicolas Castellanos)
6
+
7
+ ## 1.12.0 (2024-07-09)
8
+ * Added Thread-Safety: added SmarterCSV::Reader to process CSV files in a thread-safe manner ([issue #277](https://github.com/tilo/smarter_csv/pull/277))
9
+ * SmarterCSV::Writer changed default row separator to the system's row separator (`\n` on Linux, `\r\n` on Windows)
10
+ * added a doc tree
11
+
12
+ * POTENTIAL ISSUE:
13
+
14
+ Version 1.12.x has a change of the underlying implementation of `SmarterCSV.process(file_or_input, options, &block)`.
15
+ Underneath it now uses this interface:
16
+ ```
17
+ reader = SmarterCSV::Reader.new(file_or_input, options)
18
+
19
+ # either simple one-liner:
20
+ data = reader.process
21
+
22
+ # or block format:
23
+ data = reader.process do
24
+ # do something here
25
+ end
26
+ ```
27
+ It still supports calling `SmarterCSV.process` for backwards-compatibility, but it no longer provides access to the internal state, e.g. raw_headers.
28
+
29
+ `SmarterCSV.raw_headers` -> `reader.raw_headers`
30
+ `SmarterCSV.headers` -> `reader.headers`
31
+
32
+ If you need these features, please update your code to create an instance of `SmarterCSV::Reader` as shown above.
33
+
34
+
35
+ ## 1.11.2 (2024-07-06)
36
+ * fixing missing errors definition
37
+
38
+ ## 1.11.1 (2024-07-05) (YANKED)
39
+ * improved behavior of Writer class
40
+ * added SmarterCSV.generate shortcut for CSV writing
41
+
42
+ ## 1.11.0 (2024-07-02)
43
+ * added SmarterCSV::Writer to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44))
44
+
45
+ ## 1.10.3 (2024-03-10)
46
+ * fixed issue when frozen options are handed in (thanks to Daniel Pepper)
47
+ * cleaned-up rspec tests (thanks to Daniel Pepper)
48
+ * fixed link in README (issue #251)
49
+
50
+ ## 1.10.2 (2024-02-11)
51
+ * improve error message for missing keys
52
+
53
+ ## 1.10.1 (2024-01-07)
54
+ * fix incorrect warning about UTF-8 (issue #268, thanks hirowatari)
55
+
56
+ ## 1.10.0 (2023-12-31) ⚡ BREAKING ⚡
57
+
58
+ * BREAKING CHANGES:
59
+
60
+ Changed behavior:
61
+ + when `user_provided_headers` are provided:
62
+ * if they are not unique, an exception will now be raised
63
+ * they are taken "as is", no header transformations can be applied
64
+ * when they are given as strings or as symbols, it is assumed that this is the desired format
65
+ * the value of the `strings_as_keys` options will be ignored
66
+
67
+ + option `duplicate_header_suffix` now defaults to `''` instead of `nil`.
68
+ * this allows automatic disambiguation when processing of CSV files with duplicate headers, by appending a number
69
+ * explicitly set this option to `nil` to get the behavior from previous versions.
70
+
71
+ * performance and memory improvements
72
+ * code refactor
73
+
74
+ ## 1.9.3 (2023-12-16)
75
+ * raise SmarterCSV::IncorrectOption when `user_provided_headers` are empty
76
+ * code refactor / no functional changes
77
+ * added test cases
78
+
79
+ ## 1.9.2 (2023-11-12)
80
+ * fixed bug with '\\' at end of line (issue #252, thanks to averycrespi-moz)
81
+ * fixed require statements (issue #249, thanks to PikachuEXE, courtsimas)
82
+
83
+ ## 1.9.1 (2023-10-30) (YANKED)
84
+ * yanked
85
+ * no functional changes
86
+ * refactored directory structure
87
+ * re-added JRuby and TruffleRuby to CI tests
88
+ * no C-accelleration for JRuby
89
+ * refactored options parsing
90
+ * code coverage / rubocop
91
+
92
+ ## 1.9.0 (2023-09-04)
93
+ * fixed issue #139
94
+
95
+ * Error `SmarterCSV::MissingHeaders` was renamed to `SmarterCSV::MissingKeys`
96
+
97
+ * CHANGED BEHAVIOR:
98
+ When `key_mapping` option is used. (issue #139)
99
+ Previous versions just printed an error message when a CSV header was missing during key mapping.
100
+ Versions >= 1.9 will throw `SmarterCSV::MissingHeaders` listing all headers that were missing during mapping.
101
+
102
+ * Notable details for `key_mapping` and `required_headers`:
103
+
104
+ * `key_mapping` is applied to the headers early on during `SmarterCSV.process`, and raises an error if a header in the input CSV file is missing, and we can not map that header to its desired name.
105
+
106
+ Mapping errors can be surpressed by using:
107
+ * `silence_missing_keys` set to `true`, which silence all such errors, making all headers for mapping optional.
108
+ * `silence_missing_keys` given an Array with the specific header keys that are optional
109
+ The use case is that some header fields are optional, but we still want them renamed if they are present.
110
+
111
+ * `required_headers` checks which headers are present **after** `key_mapping` was applied.
112
+
113
+ ## 1.8.5 (2023-06-25)
114
+ * fix parsing of escaped quote characters (thanks to JP Camara)
115
+
116
+ ## 1.8.4 (2023-04-01)
117
+ * fix gem loading issue (issue #232, #234)
118
+
119
+ ## 1.8.3 (2023-03-30)
120
+ * bugfix: windows one-column files were raising NoColSepDetected (issue #229)
121
+
122
+
123
+ ## 1.8.2 (2023-03-21)
124
+ * bugfix: do not raise `NoColSepDetected` for CSV files with only one column in most cases (issue #222)
125
+ If the first lines contain non-ASCII characters, and no col_sep is detected, it will still raise `NoColSepDetected`
126
+
127
+ ## 1.8.1 (2023-03-19)
128
+ * added validation against invalid values for :col_sep, :row_sep, :quote_char (issue #216)
129
+ * deprecating `required_headers` and replace with `required_keys` (issue #140)
130
+ * fixed issue with require statement
131
+
132
+ ## 1.8.0 (2023-03-18) BREAKING
133
+ * NEW DEFAULTS: `col_sep: :auto`, `row_sep: :auto`. Fully automatic detection by default.
134
+
135
+ MAKE SURE to rescue `NoColSepDetected` if your CSV files can have unexpected formats,
136
+ e.g. from users uploading them to a service, and handle those cases.
137
+
138
+ * ignore Byte Order Marker (BOM) in first line in file (issues #27, #219)
139
+
140
+ ## 1.7.4 (2023-01-13)
141
+ * improved guessing of the column separator, thanks to Alessandro Fazzi
142
+
143
+ ## 1.7.3 (2022-12-05)
144
+ * new option :silence_missing_keys; if set to true, it ignores missing keys in `key_mapping`
145
+
146
+ ## 1.7.2 (2022-08-29)
147
+ * new option :with_line_numbers; if set to true, it adds :csv_line_number to each data hash (issue #130)
148
+
149
+ ## 1.7.1 (2022-07-31)
150
+ * bugfix for issue #195 #197 #200 which only appeared when called from Rails (thanks to Viacheslav Markin, Nicolas Rodriguez)
151
+
152
+ ## 1.7.0 (2022-06-26) (replaced by 1.7.1)
153
+ * added native code to accellerate line parsing by >10x over 1.6.0
154
+ * added option `acceleration`, defaulting to `true`, to enable native code.
155
+ Disable this option to use the ruby code for line parsing.
156
+ * increased test coverage to 100%
157
+ * rubocop changes
158
+
159
+ ## 1.7.0.pre5 (2022-06-20)
160
+ * fixed compiling
161
+ * rubocop changes
162
+ * published pre-release
163
+
164
+ ## 1.7.0.pre1 (2022-05-23)
165
+ * added native code to accellerate line parsing by >10x over 1.6.0
166
+ * added option `acceleration`, defaulting to `true`, to enable native code.
167
+ Disable this option to use the ruby code for line parsing.
168
+ * increased test coverage to 100%
169
+
170
+ ## 1.6.1 (2022-05-06)
171
+ * unused keys in `key_mapping` now generate a warning, no longer raise an exception
172
+ This is preferable when `key_mapping` is done defensively for variabilities in the CSV files.
173
+
174
+ ## 1.6.0 (2022-05-03)
175
+ * completely rewrote line parser
176
+ * added methods `SmarterCSV.raw_headers` and `SmarterCSV.headers` to allow easy examination of how the headers are processed.
177
+
178
+ ## 1.5.2 (2022-04-29)
179
+ * added missing keys to the SmarterCSV::KeyMappingError exception message #189 (thanks to John Dell)
180
+
181
+ ## 1.5.1 (2022-04-27)
182
+ * added raising of `KeyMappingError` if `key_mapping` refers to a non-existent key
183
+ * added option `duplicate_header_suffix` (thanks to Skye Shaw)
184
+ When given a non-nil string, it uses the suffix to append numbering 2..n to duplicate headers.
185
+ If your code will need to process arbitrary CSV files, please set `duplicate_header_suffix`.
186
+
187
+ ## 1.5.0 (2022-04-25)
188
+ * fixed bug with trailing col_sep characters, introduced in 1.4.0
189
+ * Fix deprecation warning in Ruby 3.0.3 / $INPUT_RECORD_SEPARATOR (thanks to Joel Fouse )
190
+
191
+ * changed default for `comment_regexp` to be `nil` for a safer default behavior (thanks to David Lazar)
192
+ **Note**
193
+ This no longer assumes that lines starting with `#` are comments.
194
+ If you want to treat lines starting with '#' as comments, use `comment_regexp: /\A#/`
195
+
196
+ ## 1.4.2 (2022-02-12)
197
+ * fixed issue with simplecov
198
+
199
+ ## 1.4.1 (2022-02-12) (PULLED)
200
+ * minor fix: also support `col_sep: :auto`
201
+ * added simplecov
202
+
203
+ ## 1.4.0 (2022-02-11)
204
+ * dropped GPL license, smarter_csv is now only using the MIT License
205
+ * added experimental option `col_sep: 'auto` to auto-detect the column separator (issue #183)
206
+ The default behavior is still to assume `,` is the column separator.
207
+ * fixed buggy behavior when using `remove_empty_values: false` (issue #168)
208
+ * fixed Ruby 3.0 deprecation
209
+
210
+ ## 1.3.0 (2022-02-06) Breaking code change if you used `--key_mappings`
211
+ * fix bug for key_mappings (issue #181)
212
+ The values of the `key_mappings` hash will now be used "as is", and no longer forced to be symbols
213
+
214
+ **Users with existing code with `--key_mappings` need to change their code** to
215
+ * either use symbols in the `key_mapping` hash
216
+ * or change the expected keys from symbols to strings
217
+
218
+ ## 1.2.9 (2021-11-22) (PULLED)
219
+ * fix bug for key_mappings (issue #181)
220
+ The values of the `key_mappings` hash will now be used "as is", and no longer forced to be symbols
221
+
222
+ ## 1.2.8 (2020-02-04)
223
+ * fix deprecation warnings on Ruby 2.7 (thank to Diego Salido)
224
+
225
+ ## 1.2.7 (2020-02-03)
226
+
227
+ ## 1.2.6 (2018-11-13)
228
+ * fixing error caused by calling f.close when we do not hand in a file
229
+
230
+ ## 1.2.5 (2018-09-16)
231
+ * fixing issue #136 with comments in CSV files
232
+ * fixing error class hierarchy
233
+
234
+ ## 1.2.4 (2018-08-06)
235
+ * using Rails blank? if it's available
236
+
237
+ ## 1.2.3 (2018-01-27)
238
+ * fixed regression / test
239
+ * fuxed quote_char interpolation for headers, but not data (thanks to Colin Petruno)
240
+ * bugfix (thanks to Joshua Smith for reporting)
241
+
242
+ ## 1.2.0 (2018-01-20)
243
+ * add default validation that a header can only appear once
244
+ * add option `required_headers`
245
+
246
+ ## 1.1.5 (2017-11-05)
247
+ * fix issue with invalid byte sequences in header (issue #103, thanks to Dave Myron)
248
+ * fix issue with invalid byte sequences in multi-line data (thanks to Ivan Ushakov)
249
+ * analyze only 500 characters by default when `:row_sep => :auto` is used.
250
+ added option `row_sep_auto_chars` to change the default if necessary. (thanks to Matthieu Paret)
251
+
252
+ ## 1.1.4 (2017-01-16)
253
+ * fixing UTF-8 related bug which was introduced in 1.1.2 (thanks to Tirdad C.)
254
+
255
+ ## 1.1.3 (2016-12-30)
256
+ * added warning when options indicate UTF-8 processing, but input filehandle is not opened with r:UTF-8 option
257
+
258
+ ## 1.1.2 (2016-12-29)
259
+ * added option `invalid_byte_sequence` (thanks to polycarpou)
260
+ * added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
261
+
262
+ ## 1.1.1 (2016-11-26)
263
+ * added option to `skip_lines` (thanks to wal)
264
+ * added option to `force_utf8` encoding (thanks to jordangraft)
265
+ * bugfix if no headers in input data (thanks to esBeee)
266
+ * ensure input file is closed (thanks to waldyr)
267
+ * improved verbose output (thankd to benmaher)
268
+ * improved documentation
269
+
270
+ ## 1.1.0 (2015-07-26)
271
+ * added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
272
+ * added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
273
+ * added support to convert dashes to underscore characters in headers (thanks to César Camacho)
274
+ * fixing automatic detection of \r\n line-endings (thanks to feens)
275
+
276
+ ## 1.0.19 (2014-10-29)
277
+ * added option :keep_original_headers to keep CSV-headers as-is (thanks to Benjamin Thouret)
278
+
279
+ ## 1.0.18 (2014-10-27)
280
+ * added support for multi-line fields / csv fields containing CR (thanks to Chris Hilton) (issue #31)
281
+
282
+ ## 1.0.17 (2014-01-13)
283
+ * added option to set :row_sep to :auto , for automatic detection of the row-separator (issue #22)
284
+
285
+ ## 1.0.16 (2014-01-13)
286
+ * :convert_values_to_numeric option can now be qualified with :except or :only (thanks to Hugo Lepetit)
287
+ * removed deprecated `process_csv` method
288
+
289
+ ## 1.0.15 (2013-12-07)
290
+ * new option:
291
+ * :remove_unmapped_keys to completely ignore columns which were not mapped with :key_mapping (thanks to Dave Sanders)
292
+
293
+ ## 1.0.14 (2013-11-01)
294
+ * added GPL-2 and MIT license to GEM spec file; if you need another license contact me
295
+
296
+ ## 1.0.12 (2013-10-15)
297
+ * added RSpec tests
298
+
299
+ ## 1.0.11 (2013-09-28)
300
+ * bugfix : fixed issue #18 - fixing issue with last chunk not being properly returned (thanks to Jordan Running)
301
+ * added RSpec tests
302
+
303
+ ## 1.0.10 (2013-06-26)
304
+ * bugfix : fixed issue #14 - passing options along to CSV.parse (thanks to Marcos Zimmermann)
305
+
306
+ ## 1.0.9 (2013-06-19)
307
+ * bugfix : fixed issue #13 with negative integers and floats not being correctly converted (thanks to Graham Wetzler)
308
+
309
+ ## 1.0.8 (2013-06-01)
310
+
311
+ * bugfix : fixed issue with nil values in inputs with quote-char (thanks to Félix Bellanger)
312
+ * new options:
313
+ * :force_simple_split : to force simiple splitting on :col_sep character for non-standard CSV-files. e.g. without properly escaped :quote_char
314
+ * :verbose : print out line number while processing (to track down problems in input files)
315
+
316
+ ## 1.0.7 (2013-05-20)
317
+
318
+ * allowing process to work with objects with a 'readline' method (thanks to taq)
319
+ * added options:
320
+ * :file_encoding : defaults to utf8 (thanks to MrTin, Paxa)
321
+
322
+ ## 1.0.6 (2013-05-19)
323
+
324
+ * bugfix : quoted fields are now correctly parsed
325
+
326
+ ## 1.0.5 (2013-05-08)
327
+
328
+ * bugfix : for :headers_in_file option
329
+
330
+ ## 1.0.4 (2012-08-17)
331
+
332
+ * renamed the following options:
333
+ * :strip_whitepace_from_values => :strip_whitespace - removes leading/trailing whitespace from headers and values
334
+
335
+ ## 1.0.3 (2012-08-16)
336
+
337
+ * added the following options:
338
+ * :strip_whitepace_from_values - removes leading/trailing whitespace from values
339
+
340
+ ## 1.0.2 (2012-08-02)
341
+
342
+ * added more options for dealing with headers:
343
+ * :user_provided_headers ,user provided Array with header strings or symbols, to precisely define what the headers should be, overriding any in-file headers (default: nil)
344
+ * :headers_in_file , if the file contains headers as the first line (default: true)
345
+
346
+ ## 1.0.1 (2012-07-30)
347
+
348
+ * added the following options:
349
+ * :downcase_header
350
+ * :strings_as_keys
351
+ * :remove_zero_values
352
+ * :remove_values_matching
353
+ * :remove_empty_hashes
354
+ * :convert_values_to_numeric
355
+
356
+ * renamed the following options:
357
+ * :remove_empty_fields => :remove_empty_values
358
+
359
+
360
+ ## 1.0.0 (2012-07-29)
361
+
362
+ * renamed `SmarterCSV.process_csv` to `SmarterCSV.process`.
363
+
364
+ ## 1.0.0.pre1 (2012-07-29)
data/CONTRIBUTORS.md ADDED
@@ -0,0 +1,56 @@
1
+ # A Big Thank You to all the Contributors!!
2
+
3
+
4
+ A Big Thank you to everyone who filed issues, sent comments, and who contributed with pull requests:
5
+
6
+ * [Jack 0](https://github.com/xjlin0)
7
+ * [Alejandro](https://github.com/agaviria)
8
+ * [Lucas Camargo de Almeida](https://github.com/lcalmeida)
9
+ * [Raphaël Bleuse](https://github.com/bleuse)
10
+ * [feens](https://github.com/feens)
11
+ * [César Camacho](https://github.com/chanko)
12
+ * [innhyu](https://github.com/innhyu)
13
+ * [Benjamin Thouret](https://github.com/benichu)
14
+ * [Chris Hilton](https://github.com/chrismhilton)
15
+ * [Sean Duckett](http://github.com/sduckett)
16
+ * [Alex Ong](http://github.com/khaong)
17
+ * [Martin Nilsson](http://github.com/MrTin)
18
+ * [Eustáquio Rangel](http://github.com/taq)
19
+ * [Pavel](http://github.com/paxa)
20
+ * [Félix Bellanger](https://github.com/Keeguon)
21
+ * [Graham Wetzler](https://github.com/grahamwetzler)
22
+ * [Marcos G. Zimmermann](https://github.com/marcosgz)
23
+ * [Jordan Running](https://github.com/jrunning)
24
+ * [Dave Sanders](https://github.com/DaveSanders)
25
+ * [Hugo Lepetit](https://github.com/giglemad)
26
+ * [esBeee](https://github.com/esBeee)
27
+ * [Waldyr de Souza](https://github.com/waldyr)
28
+ * [Ben Maher](https://github.com/benmaher)
29
+ * [Wal McConnell](https://github.com/wal)
30
+ * [Jordan Graft](https://github.com/jordangraft)
31
+ * [Michael](https://github.com/polycarpou)
32
+ * [Kevin Coleman](https://github.com/KevinColemanInc)
33
+ * [Tirdad C.](https://github.com/tridadc)
34
+ * [Dave Myron](https://github.com/contentfree)
35
+ * [Ivan Ushakov](https://github.com/IvanUshakov)
36
+ * [Matthieu Paret](https://github.com/mtparet)
37
+ * [Rohit Amarnath](https://github.com/ramarnat)
38
+ * [Joshua Smith](https://github.com/enviable)
39
+ * [Colin Petruno](https://github.com/colinpetruno)
40
+ * [Diego Salido](https://github.com/salidux)
41
+ * [Elie](https://github.com/elieteyssedou)
42
+ * [Chris Wong](https://github.com/lightwave)
43
+ * [Olle Jonsson](https://github.com/olleolleolle)
44
+ * [Nicolas Guillemain](https://github.com/Viiruus)
45
+ * [Sp6](https://github.com/sp6)
46
+ * [Joel Fouse](https://github.com/jfouse)
47
+ * [John Dell](https://github.com/spovich)
48
+ * [Viacheslav Markin](https://github.com/KXEinc)
49
+ * [Nicolas Rodriguez](https://github.com/n-rodriguez)
50
+ * [Hirotaka Mizutani ](https://github.com/hirotaka)
51
+ * [Rahul Chaudhary](https://github.com/rahulch95)
52
+ * [Alessandro Fazzi](https://github.com/pioneerskies)
53
+ * [JP Camara](https://github.com/jpcamara)
54
+ * [Kenton Hirowatari](https://github.com/hirowatari)
55
+ * [Daniel Pepper](https://github.com/dpep)
56
+ * [Nicolas Castellanos](https://github.com/nicastelo)
data/Gemfile CHANGED
@@ -1,12 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in smarter_csv.gemspec
4
6
  gemspec
5
7
 
8
+ gem "rake"
9
+ gem "rake-compiler"
6
10
 
7
- gem "rake", "< 11"
8
11
  gem 'pry'
12
+ gem "rubocop"
9
13
 
10
14
  group :test do
11
- gem "rspec", "~> 2.99"
15
+ gem "rspec"
16
+ gem "simplecov"
12
17
  end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2012..2022 Tilo Sloboda
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.