smarter_csv 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +44 -24
- data/lib/smarter_csv/smarter_csv.rb +10 -5
- data/lib/smarter_csv/version.rb +1 -1
- metadata +37 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ed29542688f14930b2c88be160a092ba0e7e0398
|
4
|
+
data.tar.gz: 6ce2d29c27af35540f83a44f7c1af1a1f3da988e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a88bbcfc69a77beb3ef9fbcd68d48d17880e57264f4755426d0f44c64d06905ba496de9805f48cb2c83373da2ba0bb893dd71a55556fed61016b6696a4a35d95
|
7
|
+
data.tar.gz: 73de0af3b952a8dc4a32f1c823162f718aeedd5304f553b4690100a245c81d68edda0280e3646e965e5c3d260176a884ea64f6a43adb3c62ae26e4bcef2bbfc7
|
data/README.md
CHANGED
@@ -126,30 +126,31 @@ The options and the block are optional.
|
|
126
126
|
|
127
127
|
`SmarterCSV.process` supports the following options:
|
128
128
|
|
129
|
-
|
130
|
-
|
131
|
-
:col_sep | ',' | column separator
|
132
|
-
:row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n"
|
133
|
-
:quote_char | '"' | quotation character
|
134
|
-
:comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header)
|
135
|
-
:chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing)
|
136
|
-
:key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash
|
137
|
-
:downcase_header | true | downcase all column headers
|
138
|
-
:strings_as_keys | false | use strings instead of symbols as the keys in the result hashes
|
139
|
-
:strip_whitespace | true | remove whitespace before/after values and headers
|
140
|
-
:remove_empty_values | true | remove values which have nil or empty strings as values
|
141
|
-
:remove_zero_values | true | remove values which have a numeric value equal to zero / 0
|
142
|
-
:remove_values_matching | nil | removes key/value pairs if value matches given regular expressions. e.g.:
|
143
|
-
|
144
|
-
:convert_values_to_numeric | true | converts strings containing Integers or Floats to the appropriate class
|
145
|
-
:remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs
|
146
|
-
:user_provided_headers | nil | *careful with that axe!*
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
:
|
151
|
-
|
152
|
-
|
129
|
+
| Option | Default | Explanation |
|
130
|
+
---------------------------------------------------------------------------------------------------------------------------------
|
131
|
+
| :col_sep | ',' | column separator |
|
132
|
+
| :row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n" |
|
133
|
+
| :quote_char | '"' | quotation character |
|
134
|
+
| :comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header) |
|
135
|
+
| :chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing) |
|
136
|
+
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
137
|
+
| :downcase_header | true | downcase all column headers |
|
138
|
+
| :strings_as_keys | false | use strings instead of symbols as the keys in the result hashes |
|
139
|
+
| :strip_whitespace | true | remove whitespace before/after values and headers |
|
140
|
+
| :remove_empty_values | true | remove values which have nil or empty strings as values |
|
141
|
+
| :remove_zero_values | true | remove values which have a numeric value equal to zero / 0 |
|
142
|
+
| :remove_values_matching | nil | removes key/value pairs if value matches given regular expressions. e.g.: |
|
143
|
+
| | | /^\$0\.0+$/ to match $0.00 , or /^#VALUE!$/ to match errors in Excel spreadsheets |
|
144
|
+
| :convert_values_to_numeric | true | converts strings containing Integers or Floats to the appropriate class |
|
145
|
+
| :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs |
|
146
|
+
| :user_provided_headers | nil | *careful with that axe!* |
|
147
|
+
| | | user provided Array of header strings or symbols, to define |
|
148
|
+
| | | what headers should be used, overriding any in-file headers. |
|
149
|
+
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
150
|
+
| :strip_chars_from_headers | nil | remove extraneous characters from the header line (e.g. if the headers are quoted) |
|
151
|
+
| :headers_in_file | true | Whether or not the file contains headers as the first line. |
|
152
|
+
| | | Important if the file does not contain headers, |
|
153
|
+
| | | otherwise you would lose the first line of data. |
|
153
154
|
|
154
155
|
|
155
156
|
#### NOTES about CSV Headers:
|
@@ -193,9 +194,17 @@ Or install it yourself as:
|
|
193
194
|
$ gem install smarter_csv
|
194
195
|
|
195
196
|
|
197
|
+
## Known Bugs
|
198
|
+
|
199
|
+
* if :col_sep (e.g. a comma) appears inside a quoted field, smarter_csv <= 1.0.4 incorrectly splits on that :col_sep
|
200
|
+
|
196
201
|
|
197
202
|
## Changes
|
198
203
|
|
204
|
+
#### 1.0.5 (2013-05-08)
|
205
|
+
|
206
|
+
* bugfix : for :headers_in_file option
|
207
|
+
|
199
208
|
#### 1.0.4 (2012-08-17)
|
200
209
|
|
201
210
|
* renamed the following options:
|
@@ -238,6 +247,17 @@ Or install it yourself as:
|
|
238
247
|
Please [open an Issue on GitHub](https://github.com/tilo/smarter_csv/issues) if you have feedback, new feature requests, or want to report a bug. Thank you!
|
239
248
|
|
240
249
|
|
250
|
+
## Special Thanks
|
251
|
+
|
252
|
+
Many thanks to people who have filed issues and sent comments.
|
253
|
+
And a special thanks to those who contributed pull requests:
|
254
|
+
|
255
|
+
* [Sean Duckett](http://github.com/sduckett)
|
256
|
+
* [Alex Ong](http://github.com/khaong)
|
257
|
+
* [Martin Nilsson](http://github.com/MrTin)
|
258
|
+
* [Eustáquio Rangel](http://github.com/taq)
|
259
|
+
* [Pavel](http://github.com/paxa)
|
260
|
+
|
241
261
|
|
242
262
|
## Contributing
|
243
263
|
|
@@ -31,7 +31,7 @@ module SmarterCSV
|
|
31
31
|
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
32
32
|
# use user-provided headers
|
33
33
|
headerA = options[:user_provided_headers]
|
34
|
-
if defined?(file_header_size)
|
34
|
+
if defined?(file_header_size) && ! file_header_size.nil?
|
35
35
|
if headerA.size != file_header_size
|
36
36
|
raise SmarterCSV::HeaderSizeMismatch , "ERROR [smarter_csv]: :user_provided_headers defines #{headerA.size} headers != CSV-file #{filename} has #{file_header_size} headers"
|
37
37
|
else
|
@@ -62,18 +62,23 @@ module SmarterCSV
|
|
62
62
|
else
|
63
63
|
use_chunks = false
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
# now on to processing all the rest of the lines in the CSV file:
|
67
67
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
68
68
|
line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
|
69
69
|
next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
70
70
|
line.chomp! # will use $/ which is set to options[:col_sep]
|
71
|
-
|
72
|
-
dataA = line.split(options[:col_sep])
|
71
|
+
|
72
|
+
dataA = line.split(options[:col_sep]) # ISSUE 4 : BUG : this splits incorrectly if , is inside quoted fields
|
73
73
|
dataA.map!{|x| x.strip} if options[:strip_whitespace]
|
74
74
|
hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
|
75
75
|
# make sure we delete any key/value pairs from the hash, which the user wanted to delete:
|
76
|
-
|
76
|
+
# Note: Ruby < 1.9 doesn't allow empty symbol literals!
|
77
|
+
hash.delete(nil); hash.delete('');
|
78
|
+
if RUBY_VERSION.to_f > 1.8
|
79
|
+
eval('hash.delete(:"")')
|
80
|
+
end
|
81
|
+
|
77
82
|
hash.delete_if{|k,v| v.nil? || v =~ /^\s*$/} if options[:remove_empty_values]
|
78
83
|
hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
|
79
84
|
hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,31 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
5
|
-
prerelease:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.5
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
-
|
6
|
+
authors:
|
7
|
+
- |
|
8
|
+
Tilo Sloboda
|
9
9
|
|
10
|
-
'
|
11
10
|
autorequire:
|
12
11
|
bindir: bin
|
13
12
|
cert_chain: []
|
14
|
-
|
13
|
+
|
14
|
+
date: 2013-05-09 00:00:00 Z
|
15
15
|
dependencies: []
|
16
|
-
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
17
|
-
optional features for processing large files in parallel, embedded comments, unusual
|
18
|
-
field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
19
|
-
email:
|
20
|
-
- ! 'tilo.sloboda@gmail.com
|
21
16
|
|
22
|
-
|
17
|
+
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
18
|
+
email:
|
19
|
+
- |
|
20
|
+
tilo.sloboda@gmail.com
|
21
|
+
|
23
22
|
executables: []
|
23
|
+
|
24
24
|
extensions: []
|
25
|
+
|
25
26
|
extra_rdoc_files: []
|
26
|
-
|
27
|
+
|
28
|
+
files:
|
27
29
|
- .gitignore
|
28
30
|
- .rvmrc
|
29
31
|
- Gemfile
|
@@ -35,29 +37,31 @@ files:
|
|
35
37
|
- lib/smarter_csv/smarter_csv.rb
|
36
38
|
- lib/smarter_csv/version.rb
|
37
39
|
- smarter_csv.gemspec
|
38
|
-
homepage:
|
40
|
+
homepage: ""
|
39
41
|
licenses: []
|
42
|
+
|
43
|
+
metadata: {}
|
44
|
+
|
40
45
|
post_install_message:
|
41
46
|
rdoc_options: []
|
42
|
-
|
47
|
+
|
48
|
+
require_paths:
|
43
49
|
- lib
|
44
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
version:
|
50
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
|
52
|
-
|
53
|
-
- - ! '>='
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- &id001
|
53
|
+
- ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- *id001
|
56
59
|
requirements: []
|
60
|
+
|
57
61
|
rubyforge_project:
|
58
|
-
rubygems_version:
|
62
|
+
rubygems_version: 2.0.3
|
59
63
|
signing_key:
|
60
|
-
specification_version:
|
61
|
-
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|
62
|
-
of optional features, e.g. chunked processing for huge CSV files
|
64
|
+
specification_version: 4
|
65
|
+
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
|
63
66
|
test_files: []
|
67
|
+
|