smarter_csv 1.6.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -0
- data/CHANGELOG.md +28 -0
- data/CONTRIBUTORS.md +3 -0
- data/Gemfile +7 -4
- data/README.md +30 -26
- data/Rakefile +15 -13
- data/ext/smarter_csv/extconf.rb +14 -0
- data/ext/smarter_csv/smarter_csv.c +86 -0
- data/lib/extensions/hash.rb +4 -2
- data/lib/smarter_csv/version.rb +3 -1
- data/lib/smarter_csv.rb +524 -10
- data/smarter_csv.gemspec +22 -7
- metadata +55 -177
- data/.gitignore +0 -10
- data/.rspec +0 -2
- data/.travis.yml +0 -27
- data/lib/smarter_csv/smarter_csv.rb +0 -461
- data/spec/fixtures/additional_separator.csv +0 -6
- data/spec/fixtures/basic.csv +0 -8
- data/spec/fixtures/binary.csv +0 -1
- data/spec/fixtures/carriage_returns_n.csv +0 -18
- data/spec/fixtures/carriage_returns_quoted.csv +0 -3
- data/spec/fixtures/carriage_returns_r.csv +0 -1
- data/spec/fixtures/carriage_returns_rn.csv +0 -18
- data/spec/fixtures/chunk_cornercase.csv +0 -10
- data/spec/fixtures/duplicate_headers.csv +0 -3
- data/spec/fixtures/empty.csv +0 -5
- data/spec/fixtures/empty_columns_1.csv +0 -2
- data/spec/fixtures/empty_columns_2.csv +0 -2
- data/spec/fixtures/hard_sample.csv +0 -2
- data/spec/fixtures/ignore_comments.csv +0 -11
- data/spec/fixtures/ignore_comments2.csv +0 -3
- data/spec/fixtures/key_mapping.csv +0 -2
- data/spec/fixtures/line_endings_n.csv +0 -4
- data/spec/fixtures/line_endings_r.csv +0 -1
- data/spec/fixtures/line_endings_rn.csv +0 -4
- data/spec/fixtures/lots_of_columns.csv +0 -2
- data/spec/fixtures/malformed.csv +0 -3
- data/spec/fixtures/malformed_header.csv +0 -3
- data/spec/fixtures/money.csv +0 -3
- data/spec/fixtures/no_header.csv +0 -7
- data/spec/fixtures/numeric.csv +0 -5
- data/spec/fixtures/pets.csv +0 -5
- data/spec/fixtures/problematic.csv +0 -8
- data/spec/fixtures/quote_char.csv +0 -9
- data/spec/fixtures/quoted.csv +0 -5
- data/spec/fixtures/quoted2.csv +0 -4
- data/spec/fixtures/separator_colon.csv +0 -4
- data/spec/fixtures/separator_comma.csv +0 -4
- data/spec/fixtures/separator_pipe.csv +0 -4
- data/spec/fixtures/separator_semi.csv +0 -4
- data/spec/fixtures/separator_tab.csv +0 -4
- data/spec/fixtures/skip_lines.csv +0 -8
- data/spec/fixtures/trading.csv +0 -3
- data/spec/fixtures/user_import.csv +0 -3
- data/spec/fixtures/valid_unicode.csv +0 -5
- data/spec/fixtures/with_dashes.csv +0 -8
- data/spec/fixtures/with_dates.csv +0 -4
- data/spec/smarter_csv/additional_separator_spec.rb +0 -45
- data/spec/smarter_csv/binary_file2_spec.rb +0 -24
- data/spec/smarter_csv/binary_file_spec.rb +0 -22
- data/spec/smarter_csv/blank_spec.rb +0 -55
- data/spec/smarter_csv/carriage_return_spec.rb +0 -190
- data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
- data/spec/smarter_csv/close_file_spec.rb +0 -15
- data/spec/smarter_csv/column_separator_spec.rb +0 -95
- data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
- data/spec/smarter_csv/duplicate_headers_spec.rb +0 -76
- data/spec/smarter_csv/empty_columns_spec.rb +0 -74
- data/spec/smarter_csv/extenstions_spec.rb +0 -17
- data/spec/smarter_csv/hard_sample_spec.rb +0 -24
- data/spec/smarter_csv/header_transformation_spec.rb +0 -21
- data/spec/smarter_csv/ignore_comments_spec.rb +0 -45
- data/spec/smarter_csv/invalid_headers_spec.rb +0 -38
- data/spec/smarter_csv/keep_headers_spec.rb +0 -24
- data/spec/smarter_csv/key_mapping_spec.rb +0 -56
- data/spec/smarter_csv/line_ending_spec.rb +0 -43
- data/spec/smarter_csv/load_basic_spec.rb +0 -20
- data/spec/smarter_csv/malformed_spec.rb +0 -25
- data/spec/smarter_csv/no_header_spec.rb +0 -29
- data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
- data/spec/smarter_csv/parse/column_separator_spec.rb +0 -61
- data/spec/smarter_csv/parse/old_csv_library_spec.rb +0 -74
- data/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +0 -170
- data/spec/smarter_csv/problematic.rb +0 -34
- data/spec/smarter_csv/quoted_spec.rb +0 -52
- data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
- data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
- data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
- data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
- data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
- data/spec/smarter_csv/skip_lines_spec.rb +0 -29
- data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
- data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
- data/spec/smarter_csv/trading_spec.rb +0 -25
- data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
- data/spec/smarter_csv/value_converters_spec.rb +0 -52
- data/spec/spec/spec_helper.rb +0 -17
- data/spec/spec.opts +0 -2
- data/spec/spec_helper.rb +0 -21
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 34a12dae406ef192b3fbac9dd8a4236e18a7a936d4289cc296e49bf3b88fd386
|
|
4
|
+
data.tar.gz: f317413b7467386b1337938b2288763d1a6da279c6823ad3f4653ff82ea90d39
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ebbd40e8c6ea684200c8efedc12174da1a0a99ab9fae8bcb00f3bfdb8dcac479285644de09003b04b073b46f8ea64cbb29686628e9b7986d3baa07b041ee7dbd
|
|
7
|
+
data.tar.gz: 9c9ba18bd64474811bbb3be2b350ab3b25a33dbd3e2cc802d697d04dcefeff2cc24150e87be6f6789eedc045a717ced2590efe6e6b6056a5c0b18095edbd0b38
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
TargetRubyVersion: 2.5 # purposely an old Ruby version
|
|
3
|
+
|
|
4
|
+
Layout/SpaceBeforeBlockBraces:
|
|
5
|
+
Enabled: false
|
|
6
|
+
|
|
7
|
+
Layout/SpaceInsideBlockBraces:
|
|
8
|
+
Enabled: false
|
|
9
|
+
|
|
10
|
+
Layout/SpaceInsideHashLiteralBraces:
|
|
11
|
+
Enabled: false
|
|
12
|
+
|
|
13
|
+
Layout/SpaceAroundOperators:
|
|
14
|
+
Enabled: false
|
|
15
|
+
|
|
16
|
+
Metrics/AbcSize:
|
|
17
|
+
Enabled: false
|
|
18
|
+
|
|
19
|
+
Metrics/BlockLength:
|
|
20
|
+
Enabled: false
|
|
21
|
+
|
|
22
|
+
Metrics/BlockNesting:
|
|
23
|
+
Enabled: false
|
|
24
|
+
|
|
25
|
+
Metrics/CyclomaticComplexity: # BS rule
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
Metrics/MethodLength:
|
|
29
|
+
Enabled: false
|
|
30
|
+
|
|
31
|
+
Metrics/ModuleLength:
|
|
32
|
+
Enabled: false
|
|
33
|
+
|
|
34
|
+
Metrics/PerceivedComplexity: # BS rule
|
|
35
|
+
Enabled: false
|
|
36
|
+
|
|
37
|
+
Naming/PredicateName:
|
|
38
|
+
Enabled: false
|
|
39
|
+
|
|
40
|
+
Naming/VariableName:
|
|
41
|
+
Enabled: false
|
|
42
|
+
|
|
43
|
+
Naming/VariableNumber:
|
|
44
|
+
Enabled: false
|
|
45
|
+
|
|
46
|
+
Style/ClassEqualityComparison:
|
|
47
|
+
Enabled: false
|
|
48
|
+
|
|
49
|
+
Style/ConditionalAssignment:
|
|
50
|
+
Enabled: false
|
|
51
|
+
|
|
52
|
+
Style/CommentedKeyword:
|
|
53
|
+
Enabled: false
|
|
54
|
+
|
|
55
|
+
Style/Documentation:
|
|
56
|
+
Enabled: false
|
|
57
|
+
|
|
58
|
+
Style/DoubleNegation: # that's how to make true boolean results - this rule is nonsense
|
|
59
|
+
Enabled: false
|
|
60
|
+
|
|
61
|
+
Style/EmptyElse:
|
|
62
|
+
Enabled: false
|
|
63
|
+
|
|
64
|
+
Style/Encoding:
|
|
65
|
+
Enabled: false
|
|
66
|
+
|
|
67
|
+
Style/EvalWithLocation:
|
|
68
|
+
Enabled: false
|
|
69
|
+
|
|
70
|
+
Style/FormatString:
|
|
71
|
+
Enabled: false
|
|
72
|
+
|
|
73
|
+
Style/FormatStringToken:
|
|
74
|
+
Enabled: false
|
|
75
|
+
|
|
76
|
+
Style/GuardClause:
|
|
77
|
+
Enabled: false
|
|
78
|
+
|
|
79
|
+
Style/IfInsideElse:
|
|
80
|
+
Enabled: false
|
|
81
|
+
|
|
82
|
+
Style/IfUnlessModifier:
|
|
83
|
+
Enabled: false
|
|
84
|
+
|
|
85
|
+
Style/NestedTernaryOperator:
|
|
86
|
+
Enabled: false
|
|
87
|
+
|
|
88
|
+
Style/PreferredHashMethods:
|
|
89
|
+
Enabled: false
|
|
90
|
+
|
|
91
|
+
Style/NumericPredicate:
|
|
92
|
+
Enabled: false
|
|
93
|
+
|
|
94
|
+
Style/PercentLiteralDelimiters:
|
|
95
|
+
Enabled: false
|
|
96
|
+
|
|
97
|
+
Style/RegexpLiteral:
|
|
98
|
+
Enabled: false
|
|
99
|
+
|
|
100
|
+
Style/SafeNavigation:
|
|
101
|
+
Enabled: false
|
|
102
|
+
|
|
103
|
+
Style/SlicingWithRange:
|
|
104
|
+
Enabled: false
|
|
105
|
+
|
|
106
|
+
Style/SpecialGlobalVars: # DANGER: unsafe rule!!
|
|
107
|
+
Enabled: false
|
|
108
|
+
|
|
109
|
+
Style/StringLiterals:
|
|
110
|
+
Enabled: false
|
|
111
|
+
EnforcedStyle: double_quotes
|
|
112
|
+
|
|
113
|
+
Style/StringLiteralsInInterpolation:
|
|
114
|
+
Enabled: false
|
|
115
|
+
EnforcedStyle: double_quotes
|
|
116
|
+
|
|
117
|
+
Style/SymbolProc: # old Ruby versions can't do this
|
|
118
|
+
Enabled: false
|
|
119
|
+
|
|
120
|
+
Style/TrailingCommaInHashLiteral:
|
|
121
|
+
Enabled: false
|
|
122
|
+
|
|
123
|
+
Style/TrailingUnderscoreVariable:
|
|
124
|
+
Enabled: false
|
|
125
|
+
|
|
126
|
+
# Style/UnlessModifier:
|
|
127
|
+
# Enabled: false
|
|
128
|
+
|
|
129
|
+
Style/ZeroLengthPredicate:
|
|
130
|
+
Enabled: false
|
|
131
|
+
|
|
132
|
+
Layout/LineLength:
|
|
133
|
+
Max: 240
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.7.2 (2022-08-29)
|
|
5
|
+
* new option :with_line_numbers; if set to true, it adds :csv_line_number to each data hash (issue #130)
|
|
6
|
+
|
|
7
|
+
## 1.7.1 (2022-07-31)
|
|
8
|
+
* bugfix for issue #195 #197 #200 which only appeared when called from Rails (thanks to Viacheslav Markin, Nicolas Rodriguez)
|
|
9
|
+
|
|
10
|
+
## 1.7.0 (2022-06-26) (replaced by 1.7.1)
|
|
11
|
+
* added native code to accellerate line parsing by >10x over 1.6.0
|
|
12
|
+
* added option `acceleration`, defaulting to `true`, to enable native code.
|
|
13
|
+
Disable this option to use the ruby code for line parsing.
|
|
14
|
+
* increased test coverage to 100%
|
|
15
|
+
* rubocop changes
|
|
16
|
+
|
|
17
|
+
## 1.7.0.pre5 (2022-06-20)
|
|
18
|
+
* fixed compiling
|
|
19
|
+
* rubocop changes
|
|
20
|
+
* published pre-release
|
|
21
|
+
|
|
22
|
+
## 1.7.0.pre1 (2022-05-23)
|
|
23
|
+
* added native code to accellerate line parsing by >10x over 1.6.0
|
|
24
|
+
* added option `acceleration`, defaulting to `true`, to enable native code.
|
|
25
|
+
Disable this option to use the ruby code for line parsing.
|
|
26
|
+
* increased test coverage to 100%
|
|
27
|
+
|
|
28
|
+
## 1.6.1 (2022-05-06)
|
|
29
|
+
* unused keys in `key_mapping` now generate a warning, no longer raise an exception
|
|
30
|
+
This is preferable when `key_mapping` is done defensively for variabilities in the CSV files.
|
|
31
|
+
|
|
4
32
|
## 1.6.0 (2022-05-03)
|
|
5
33
|
* completely rewrote line parser
|
|
6
34
|
* added methods `SmarterCSV.raw_headers` and `SmarterCSV.headers` to allow easy examination of how the headers are processed.
|
data/CONTRIBUTORS.md
CHANGED
|
@@ -45,3 +45,6 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
|
45
45
|
* [Sp6](https://github.com/sp6)
|
|
46
46
|
* [Joel Fouse](https://github.com/jfouse)
|
|
47
47
|
* [John Dell](https://github.com/spovich)
|
|
48
|
+
* [Viacheslav Markin](https://github.com/KXEinc)
|
|
49
|
+
* [Nicolas Rodriguez](https://github.com/n-rodriguez)
|
|
50
|
+
* [Hirotaka Mizutani ](https://github.com/hirotaka)
|
data/Gemfile
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
source 'https://rubygems.org'
|
|
2
4
|
|
|
3
5
|
# Specify your gem's dependencies in smarter_csv.gemspec
|
|
4
6
|
gemspec
|
|
5
7
|
|
|
6
|
-
gem "rake", "< 11"
|
|
8
|
+
gem "rake" # , "< 11"
|
|
9
|
+
gem "rake-compiler"
|
|
10
|
+
|
|
7
11
|
gem 'pry'
|
|
8
12
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
end
|
|
13
|
+
gem "rspec"
|
|
14
|
+
gem "rubocop"
|
data/README.md
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
|
|
2
|
-
|
|
2
|
+
# SmarterCSV
|
|
3
|
+
|
|
4
|
+
[](https://codecov.io/gh/tilo/smarter_csv) [](http://badge.fury.io/rb/smarter_csv)
|
|
5
|
+
|
|
6
|
+
#### Work towards Future Version 2.0
|
|
3
7
|
|
|
4
|
-
* Work towards SmarterCSV 2.0 is still
|
|
5
|
-
Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/
|
|
8
|
+
* Work towards SmarterCSV 2.0 is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time.
|
|
9
|
+
Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/tree/2.0-develop), open any issues and pull requests with mention of tag v2.0.
|
|
6
10
|
|
|
7
11
|
* New versions of SmarterCSV 1.x will soon print a deprecation warning if you set :verbose to true
|
|
8
12
|
See below for list of deprecated options.
|
|
@@ -14,16 +18,11 @@
|
|
|
14
18
|
|
|
15
19
|
---------------
|
|
16
20
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
[](http://travis-ci.com/tilo/smarter_csv) [](http://badge.fury.io/rb/smarter_csv)
|
|
21
|
+
#### SmarterCSV 1.x [Current Version]
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
`smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, or kicking-off batch jobs with Sidekiq.
|
|
22
24
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
`smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with Mongoid or ActiveRecord,
|
|
26
|
-
and parallel processing with Resque or Sidekiq.
|
|
25
|
+
To create high-quality output, some options are enabled as a default. Please make sure to check the output and tweak the options accordingly.
|
|
27
26
|
|
|
28
27
|
One `smarter_csv` user wrote:
|
|
29
28
|
|
|
@@ -132,7 +131,21 @@ and how the `process` method returns the number of chunks when called with a blo
|
|
|
132
131
|
|
|
133
132
|
=> returns number of chunks / rows we processed
|
|
134
133
|
```
|
|
135
|
-
|
|
134
|
+
|
|
135
|
+
#### Example 4: Reading a CSV-like File, and Processing it with Sidekiq:
|
|
136
|
+
```ruby
|
|
137
|
+
filename = '/tmp/strange_db_dump' # a file with CRTL-A as col_separator, and with CTRL-B\n as record_separator (hello iTunes!)
|
|
138
|
+
options = {
|
|
139
|
+
:col_sep => "\cA", :row_sep => "\cB\n", :comment_regexp => /^#/,
|
|
140
|
+
:chunk_size => 100 , :key_mapping => {:export_date => nil, :name => :genre}
|
|
141
|
+
}
|
|
142
|
+
n = SmarterCSV.process(filename, options) do |chunk|
|
|
143
|
+
SidekiqWorkerClass.process_async(chunk ) # pass an array of hashes to Sidekiq workers for parallel processing
|
|
144
|
+
end
|
|
145
|
+
=> returns number of chunks
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### Example 5: Populate a MongoDB Database in Chunks of 100 records with SmarterCSV:
|
|
136
149
|
```ruby
|
|
137
150
|
# using chunks:
|
|
138
151
|
filename = '/tmp/some.csv'
|
|
@@ -146,18 +159,6 @@ and how the `process` method returns the number of chunks when called with a blo
|
|
|
146
159
|
=> returns number of chunks we processed
|
|
147
160
|
```
|
|
148
161
|
|
|
149
|
-
#### Example 5: Reading a CSV-like File, and Processing it with Resque:
|
|
150
|
-
```ruby
|
|
151
|
-
filename = '/tmp/strange_db_dump' # a file with CRTL-A as col_separator, and with CTRL-B\n as record_separator (hello iTunes!)
|
|
152
|
-
options = {
|
|
153
|
-
:col_sep => "\cA", :row_sep => "\cB\n", :comment_regexp => /^#/,
|
|
154
|
-
:chunk_size => 100 , :key_mapping => {:export_date => nil, :name => :genre}
|
|
155
|
-
}
|
|
156
|
-
n = SmarterCSV.process(filename, options) do |chunk|
|
|
157
|
-
Resque.enque( ResqueWorkerClass, chunk ) # pass chunks of CSV-data to Resque workers for parallel processing
|
|
158
|
-
end
|
|
159
|
-
=> returns number of chunks
|
|
160
|
-
```
|
|
161
162
|
#### Example 6: Using Value Converters
|
|
162
163
|
|
|
163
164
|
NOTE: If you use `key_mappings` and `value_converters`, make sure that the value converters has references the keys based on the final mapped name, not the original name in the CSV file.
|
|
@@ -239,6 +240,7 @@ The options and the block are optional.
|
|
|
239
240
|
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
|
240
241
|
| :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs or all empty values |
|
|
241
242
|
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
|
243
|
+
| :with_line_numbers | false | add :csv_line_number to heach data hash |
|
|
242
244
|
---------------------------------------------------------------------------------------------------------------------------------
|
|
243
245
|
|
|
244
246
|
#### Deprecated 1.x Options: to be replaced in 2.0
|
|
@@ -346,8 +348,10 @@ Or install it yourself as:
|
|
|
346
348
|
|
|
347
349
|
Please [open an Issue on GitHub](https://github.com/tilo/smarter_csv/issues) if you have feedback, new feature requests, or want to report a bug. Thank you!
|
|
348
350
|
|
|
349
|
-
|
|
350
|
-
*
|
|
351
|
+
For reporting issues, please:
|
|
352
|
+
* include a small sample CSV file
|
|
353
|
+
* open a pull-request adding a test that demonstrates the issue
|
|
354
|
+
* mention your version of SmarterCSV, Ruby, Rails
|
|
351
355
|
|
|
352
356
|
## [A Special Thanks to all Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
|
|
353
357
|
|
data/Rakefile
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
require "bundler/gem_tasks"
|
|
3
|
-
require 'rubygems'
|
|
4
|
-
require 'rake'
|
|
5
4
|
require 'rspec/core/rake_task'
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
end
|
|
8
|
+
require "rubocop/rake_task"
|
|
9
|
+
|
|
10
|
+
RuboCop::RakeTask.new
|
|
13
11
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
require "rake/extensiontask"
|
|
13
|
+
|
|
14
|
+
task build: :compile
|
|
15
|
+
|
|
16
|
+
Rake::ExtensionTask.new("smarter_csv") do |ext|
|
|
17
|
+
ext.ext_dir = "ext/smarter_csv"
|
|
19
18
|
end
|
|
19
|
+
|
|
20
|
+
# task default: %i[clobber compile spec rubocop]
|
|
21
|
+
task default: %i[clobber compile spec]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'mkmf'
|
|
4
|
+
|
|
5
|
+
require "rbconfig"
|
|
6
|
+
if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
|
|
7
|
+
fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "$(cflags)")
|
|
8
|
+
puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
|
|
9
|
+
RbConfig::MAKEFILE_CONFIG["CFLAGS"] = fixed_CFLAGS
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
CONFIG["optflags"] = "-O3"
|
|
13
|
+
|
|
14
|
+
create_makefile('smarter_csv/smarter_csv')
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#include "ruby.h"
|
|
2
|
+
#include "ruby/encoding.h"
|
|
3
|
+
#include <stdio.h>
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
#ifndef bool
|
|
7
|
+
#define bool int
|
|
8
|
+
#define false ((bool)0)
|
|
9
|
+
#define true ((bool)1)
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
/*
|
|
13
|
+
max_size: pass nil if no limit is specified
|
|
14
|
+
*/
|
|
15
|
+
static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size) {
|
|
16
|
+
if (RB_TYPE_P(line, T_NIL) == 1) {
|
|
17
|
+
return rb_ary_new();
|
|
18
|
+
|
|
19
|
+
} else if (RB_TYPE_P(line, T_STRING) == 1) {
|
|
20
|
+
rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
|
|
21
|
+
char *startP = RSTRING_PTR(line); /* may not be null terminated */
|
|
22
|
+
long line_len = RSTRING_LEN(line);
|
|
23
|
+
char *endP = startP + line_len ; /* points behind the string */
|
|
24
|
+
char *p = startP;
|
|
25
|
+
|
|
26
|
+
char *col_sepP = RSTRING_PTR(col_sep);
|
|
27
|
+
long col_sep_len = RSTRING_LEN(col_sep);
|
|
28
|
+
|
|
29
|
+
char *quoteP = RSTRING_PTR(quote_char);
|
|
30
|
+
long quote_len = RSTRING_LEN(quote_char);
|
|
31
|
+
long quote_count = 0;
|
|
32
|
+
|
|
33
|
+
bool col_sep_found = true;
|
|
34
|
+
|
|
35
|
+
VALUE elements = rb_ary_new();
|
|
36
|
+
VALUE field;
|
|
37
|
+
long i;
|
|
38
|
+
|
|
39
|
+
while (p < endP) {
|
|
40
|
+
/* does the remaining string start with col_sep ? */
|
|
41
|
+
col_sep_found = true;
|
|
42
|
+
for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
|
|
43
|
+
col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
|
|
44
|
+
}
|
|
45
|
+
/* if col_sep was found and we have even quotes */
|
|
46
|
+
if (col_sep_found && (quote_count % 2 == 0)) {
|
|
47
|
+
/* if max_size != nil && lements.size >= header_size */
|
|
48
|
+
if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
|
|
49
|
+
break;
|
|
50
|
+
} else {
|
|
51
|
+
/* push that field with original encoding onto the results */
|
|
52
|
+
field = rb_enc_str_new(startP, p - startP, encoding);
|
|
53
|
+
rb_ary_push(elements, field);
|
|
54
|
+
|
|
55
|
+
p += col_sep_len;
|
|
56
|
+
startP = p;
|
|
57
|
+
}
|
|
58
|
+
} else {
|
|
59
|
+
if (*p == *quoteP) {
|
|
60
|
+
quote_count += 1;
|
|
61
|
+
}
|
|
62
|
+
p++;
|
|
63
|
+
}
|
|
64
|
+
} /* while */
|
|
65
|
+
|
|
66
|
+
/* check if the last part of the line needs to be processed */
|
|
67
|
+
if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
|
|
68
|
+
/* copy the remaining line as a field with original encoding onto the results */
|
|
69
|
+
field = rb_enc_str_new(startP, endP - startP, encoding);
|
|
70
|
+
rb_ary_push(elements, field);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return elements;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
VALUE SmarterCSV = Qnil;
|
|
81
|
+
|
|
82
|
+
void Init_smarter_csv(void) {
|
|
83
|
+
VALUE SmarterCSV = rb_define_module("SmarterCSV");
|
|
84
|
+
|
|
85
|
+
rb_define_module_function(SmarterCSV, "parse_csv_line_c", rb_parse_csv_line, 4);
|
|
86
|
+
}
|
data/lib/extensions/hash.rb
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
# the following extension for class Hash is needed (from Facets of Ruby library):
|
|
2
4
|
|
|
3
5
|
class Hash
|
|
4
|
-
def self.zip(keys,values) # from Facets of Ruby library
|
|
5
|
-
|
|
6
|
+
def self.zip(keys, values) # from Facets of Ruby library
|
|
7
|
+
keys.zip(values).to_h
|
|
6
8
|
end
|
|
7
9
|
end
|
data/lib/smarter_csv/version.rb
CHANGED