smarter_csv 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +33 -13
- data/lib/smarter_csv/smarter_csv.rb +36 -13
- data/lib/smarter_csv/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -125,25 +125,38 @@ or an Array of Arrays, which contain Hashes, or processes Chunks of Hashes via a
|
|
125
125
|
The options and the block are optional.
|
126
126
|
|
127
127
|
`SmarterCSV.process` supports the following options:
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
128
|
+
|
129
|
+
Option | Default | Explanation
|
130
|
+
-----------------------------+----------+--------------------------------------------------------------
|
131
|
+
:col_sep | ',' | column separator
|
132
|
+
:row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n"
|
133
|
+
:quote_char | '"' | quotation character
|
134
|
+
:comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header)
|
135
|
+
:chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing)
|
136
|
+
:key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash
|
137
|
+
:downcase_header | true | downcase all column headers
|
138
|
+
:strings_as_keys | false | use strings instead of symbols as the keys in the result hashes
|
139
|
+
:remove_empty_values | true | remove values which have nil or empty strings as values
|
140
|
+
:remove_zero_values | true | remove values which have a numeric value equal to zero / 0
|
141
|
+
:remove_values_matching | nil | removes key/value pairs if value matches given regular expressions. e.g.:
|
142
|
+
| | /^\$0\.0+$/ to match $0.00 , or /^#VALUE!$/ to match errors in Excel spreadsheets
|
143
|
+
:convert_values_to_numeric | true | converts strings containing Integers or Floats to the appropriate class
|
144
|
+
:remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs
|
145
|
+
:user_provided_headers | nil | user provided Array of header strings or symbols, to define
|
146
|
+
| | what headers should be used, overriding any in-file headers. (dangerous)
|
147
|
+
| | You can not combine the :user_provided_headers and :key_mapping options
|
148
|
+
:headers_in_file | true | Whether or not the file contains headers as the first line.
|
149
|
+
| | Important if the file does not contain headers,
|
150
|
+
| | otherwise you would lose the first line of data.
|
151
|
+
|
141
152
|
|
142
153
|
#### NOTES about CSV Headers:
|
143
154
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
144
155
|
* the first line with the CSV header may or may not be commented out according to the :comment_regexp
|
145
156
|
* any occurences of :comment_regexp or :row_sep will be stripped from the first line with the CSV header
|
146
157
|
* any of the keys in the header line will be downcased, spaces replaced by underscore, and converted to Ruby symbols before being used as keys in the returned Hashes
|
158
|
+
* you can not combine the :user_provided_headers and :key_mapping options
|
159
|
+
* if the incorrect number of headers are provided via :user_provided_headers, exception SmarterCSV::HeaderSizeMismatch is raised
|
147
160
|
|
148
161
|
#### NOTES on Key Mapping:
|
149
162
|
* keys in the header line of the file can be re-mapped to a chosen set of symbols, so the resulting Hashes can be better used internally in your application (e.g. when directly creating MongoDB entries with them)
|
@@ -178,8 +191,15 @@ Or install it yourself as:
|
|
178
191
|
$ gem install smarter_csv
|
179
192
|
|
180
193
|
|
194
|
+
|
181
195
|
## Changes
|
182
196
|
|
197
|
+
#### 1.0.1 (2012-08-02)
|
198
|
+
|
199
|
+
* added more options for dealing with headers:
|
200
|
+
* :user_provided_headers ,user provided Array with header strings or symbols, to precisely define what the headers should be, overriding any in-file headers (default: nil)
|
201
|
+
* :headers_in_file , if the file contains headers as the first line (default: true)
|
202
|
+
|
183
203
|
#### 1.0.1 (2012-07-30)
|
184
204
|
|
185
205
|
* added the following options:
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module SmarterCSV
|
2
|
+
|
3
|
+
class HeaderSizeMismatch < Exception
|
4
|
+
end
|
5
|
+
|
2
6
|
def SmarterCSV.process(filename, options={}, &block)
|
3
7
|
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
|
4
8
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true ,
|
5
|
-
:convert_values_to_numeric => true, :strip_chars_from_headers => nil ,
|
9
|
+
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
6
10
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false
|
7
11
|
}
|
8
12
|
options = default_options.merge(options)
|
@@ -12,20 +16,39 @@ module SmarterCSV
|
|
12
16
|
begin
|
13
17
|
$/ = options[:row_sep]
|
14
18
|
f = File.open(filename, "r")
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
|
20
|
+
if options[:headers_in_file] # extract the header line
|
21
|
+
# process the header line in the CSV file..
|
22
|
+
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
23
|
+
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
24
|
+
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
25
|
+
file_headerA = header.split(options[:col_sep]).map{|x| x.gsub(%r/options[:quote_char]/,'').gsub(/\s+/,'_')}
|
26
|
+
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
27
|
+
file_header_size = file_headerA.size
|
28
|
+
end
|
29
|
+
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
30
|
+
# use user-provided headers
|
31
|
+
headerA = options[:user_provided_headers]
|
32
|
+
if defined?(file_header_size)
|
33
|
+
if headerA.size != file_header_size
|
34
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR [smarter_csv]: :user_provided_headers defines #{headerA.size} headers != CSV-file #{filename} has #{file_header_size} headers"
|
35
|
+
else
|
36
|
+
# we could print out the mapping of file_headerA to headerA here
|
37
|
+
end
|
38
|
+
end
|
39
|
+
else
|
40
|
+
headerA = file_headerA
|
41
|
+
end
|
22
42
|
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys]
|
23
|
-
key_mappingH = options[:key_mapping]
|
24
43
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
44
|
+
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
45
|
+
key_mappingH = options[:key_mapping]
|
46
|
+
|
47
|
+
# do some key mapping on the keys in the file header
|
48
|
+
# if you want to completely delete a key, then map it to nil or to ''
|
49
|
+
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
50
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x].to_sym) : x}
|
51
|
+
end
|
29
52
|
end
|
30
53
|
|
31
54
|
# in case we use chunking.. we'll need to set it up..
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-08-02 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
17
17
|
optional features for processing large files in parallel, embedded comments, unusual
|