smarter_csv 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +33 -13
- data/lib/smarter_csv/smarter_csv.rb +36 -13
- data/lib/smarter_csv/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -125,25 +125,38 @@ or an Array of Arrays, which contain Hashes, or processes Chunks of Hashes via a
|
|
125
125
|
The options and the block are optional.
|
126
126
|
|
127
127
|
`SmarterCSV.process` supports the following options:
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
128
|
+
|
129
|
+
Option | Default | Explanation
|
130
|
+
-----------------------------+----------+--------------------------------------------------------------
|
131
|
+
:col_sep | ',' | column separator
|
132
|
+
:row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n"
|
133
|
+
:quote_char | '"' | quotation character
|
134
|
+
:comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header)
|
135
|
+
:chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing)
|
136
|
+
:key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash
|
137
|
+
:downcase_header | true | downcase all column headers
|
138
|
+
:strings_as_keys | false | use strings instead of symbols as the keys in the result hashes
|
139
|
+
:remove_empty_values | true | remove values which have nil or empty strings as values
|
140
|
+
:remove_zero_values | true | remove values which have a numeric value equal to zero / 0
|
141
|
+
:remove_values_matching | nil | removes key/value pairs if value matches given regular expressions. e.g.:
|
142
|
+
| | /^\$0\.0+$/ to match $0.00 , or /^#VALUE!$/ to match errors in Excel spreadsheets
|
143
|
+
:convert_values_to_numeric | true | converts strings containing Integers or Floats to the appropriate class
|
144
|
+
:remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs
|
145
|
+
:user_provided_headers | nil | user provided Array of header strings or symbols, to define
|
146
|
+
| | what headers should be used, overriding any in-file headers. (dangerous)
|
147
|
+
| | You can not combine the :user_provided_headers and :key_mapping options
|
148
|
+
:headers_in_file | true | Whether or not the file contains headers as the first line.
|
149
|
+
| | Important if the file does not contain headers,
|
150
|
+
| | otherwise you would lose the first line of data.
|
151
|
+
|
141
152
|
|
142
153
|
#### NOTES about CSV Headers:
|
143
154
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
144
155
|
* the first line with the CSV header may or may not be commented out according to the :comment_regexp
|
145
156
|
* any occurences of :comment_regexp or :row_sep will be stripped from the first line with the CSV header
|
146
157
|
* any of the keys in the header line will be downcased, spaces replaced by underscore, and converted to Ruby symbols before being used as keys in the returned Hashes
|
158
|
+
* you can not combine the :user_provided_headers and :key_mapping options
|
159
|
+
* if the incorrect number of headers are provided via :user_provided_headers, exception SmarterCSV::HeaderSizeMismatch is raised
|
147
160
|
|
148
161
|
#### NOTES on Key Mapping:
|
149
162
|
* keys in the header line of the file can be re-mapped to a chosen set of symbols, so the resulting Hashes can be better used internally in your application (e.g. when directly creating MongoDB entries with them)
|
@@ -178,8 +191,15 @@ Or install it yourself as:
|
|
178
191
|
$ gem install smarter_csv
|
179
192
|
|
180
193
|
|
194
|
+
|
181
195
|
## Changes
|
182
196
|
|
197
|
+
#### 1.0.1 (2012-08-02)
|
198
|
+
|
199
|
+
* added more options for dealing with headers:
|
200
|
+
* :user_provided_headers ,user provided Array with header strings or symbols, to precisely define what the headers should be, overriding any in-file headers (default: nil)
|
201
|
+
* :headers_in_file , if the file contains headers as the first line (default: true)
|
202
|
+
|
183
203
|
#### 1.0.1 (2012-07-30)
|
184
204
|
|
185
205
|
* added the following options:
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module SmarterCSV
|
2
|
+
|
3
|
+
class HeaderSizeMismatch < Exception
|
4
|
+
end
|
5
|
+
|
2
6
|
def SmarterCSV.process(filename, options={}, &block)
|
3
7
|
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
|
4
8
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true ,
|
5
|
-
:convert_values_to_numeric => true, :strip_chars_from_headers => nil ,
|
9
|
+
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
6
10
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false
|
7
11
|
}
|
8
12
|
options = default_options.merge(options)
|
@@ -12,20 +16,39 @@ module SmarterCSV
|
|
12
16
|
begin
|
13
17
|
$/ = options[:row_sep]
|
14
18
|
f = File.open(filename, "r")
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
|
20
|
+
if options[:headers_in_file] # extract the header line
|
21
|
+
# process the header line in the CSV file..
|
22
|
+
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
23
|
+
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
24
|
+
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
25
|
+
file_headerA = header.split(options[:col_sep]).map{|x| x.gsub(%r/options[:quote_char]/,'').gsub(/\s+/,'_')}
|
26
|
+
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
27
|
+
file_header_size = file_headerA.size
|
28
|
+
end
|
29
|
+
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
30
|
+
# use user-provided headers
|
31
|
+
headerA = options[:user_provided_headers]
|
32
|
+
if defined?(file_header_size)
|
33
|
+
if headerA.size != file_header_size
|
34
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR [smarter_csv]: :user_provided_headers defines #{headerA.size} headers != CSV-file #{filename} has #{file_header_size} headers"
|
35
|
+
else
|
36
|
+
# we could print out the mapping of file_headerA to headerA here
|
37
|
+
end
|
38
|
+
end
|
39
|
+
else
|
40
|
+
headerA = file_headerA
|
41
|
+
end
|
22
42
|
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys]
|
23
|
-
key_mappingH = options[:key_mapping]
|
24
43
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
44
|
+
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
45
|
+
key_mappingH = options[:key_mapping]
|
46
|
+
|
47
|
+
# do some key mapping on the keys in the file header
|
48
|
+
# if you want to completely delete a key, then map it to nil or to ''
|
49
|
+
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
50
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x].to_sym) : x}
|
51
|
+
end
|
29
52
|
end
|
30
53
|
|
31
54
|
# in case we use chunking.. we'll need to set it up..
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-08-02 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
17
17
|
optional features for processing large files in parallel, embedded comments, unusual
|