smarter_csv 1.0.7 → 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +11 -0
- data/lib/smarter_csv/smarter_csv.rb +9 -5
- data/lib/smarter_csv/version.rb +1 -1
- metadata +36 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4426aea4be447cb000436980c4b8f34ac12d9683
|
4
|
+
data.tar.gz: 5f15ed8a24db46241f02e672d099413445011238
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fbf06328d2e51caa2e66ba913b7a6b49d0db4d37caf932a7f66846db3a47207adc066be4d7df65c6b215bf20f3d6d5713f85b0c1e3e5cd1f930ca573949c69ff
|
7
|
+
data.tar.gz: e098866b5d33c254136854853f5bd55bac89bcae93f286365518986a4d254d17ea116004a5367c5538a8dab6d8854bd6383109aac2889b503c20f86cc47c8f84
|
data/README.md
CHANGED
@@ -152,6 +152,9 @@ The options and the block are optional.
|
|
152
152
|
| | | Important if the file does not contain headers, |
|
153
153
|
| | | otherwise you would lose the first line of data. |
|
154
154
|
| :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
|
155
|
+
| :force_simple_split | false | force simiple splitting on :col_sep character for non-standard CSV-files. |
|
156
|
+
| | | e.g. when :quote_char is not properly escaped |
|
157
|
+
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
155
158
|
|
156
159
|
|
157
160
|
#### NOTES about CSV Headers:
|
@@ -197,6 +200,13 @@ Or install it yourself as:
|
|
197
200
|
|
198
201
|
## Changes
|
199
202
|
|
203
|
+
#### 1.0.8 (2013-06-01)
|
204
|
+
|
205
|
+
* bugfix : fixed issue with nil values in inputs with quote-char (thanks to Félix Bellanger)
|
206
|
+
* new options:
|
207
|
+
* :force_simple_split : to force simiple splitting on :col_sep character for non-standard CSV-files. e.g. without properly escaped :quote_char
|
208
|
+
* :verbose : print out line number while processing (to track down problems in input files)
|
209
|
+
|
200
210
|
#### 1.0.7 (2013-05-20)
|
201
211
|
|
202
212
|
* allowing process to work with objects with a 'readline' method (thanks to taq)
|
@@ -263,6 +273,7 @@ And a special thanks to those who contributed pull requests:
|
|
263
273
|
* [Martin Nilsson](http://github.com/MrTin)
|
264
274
|
* [Eustáquio Rangel](http://github.com/taq)
|
265
275
|
* [Pavel](http://github.com/paxa)
|
276
|
+
* [Félix Bellanger](https://github.com/Keeguon)
|
266
277
|
|
267
278
|
|
268
279
|
## Contributing
|
@@ -4,7 +4,7 @@ module SmarterCSV
|
|
4
4
|
end
|
5
5
|
|
6
6
|
def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
|
7
|
-
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
|
7
|
+
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
|
8
8
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
9
9
|
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
10
10
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8'
|
@@ -13,6 +13,7 @@ module SmarterCSV
|
|
13
13
|
headerA = []
|
14
14
|
result = []
|
15
15
|
old_row_sep = $/
|
16
|
+
line_count = 0
|
16
17
|
begin
|
17
18
|
$/ = options[:row_sep]
|
18
19
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
@@ -21,9 +22,10 @@ module SmarterCSV
|
|
21
22
|
# process the header line in the CSV file..
|
22
23
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
23
24
|
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
25
|
+
line_count += 1
|
24
26
|
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
25
|
-
if header =~ %r{#{options[:quote_char]}}
|
26
|
-
file_headerA = CSV.parse( header ).flatten
|
27
|
+
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
28
|
+
file_headerA = CSV.parse( header ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
27
29
|
else
|
28
30
|
file_headerA = header.split(options[:col_sep])
|
29
31
|
end
|
@@ -71,11 +73,13 @@ module SmarterCSV
|
|
71
73
|
# now on to processing all the rest of the lines in the CSV file:
|
72
74
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
73
75
|
line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
|
76
|
+
line_count += 1
|
77
|
+
print "processing line %10d\r" % line_count if options[:verbose]
|
74
78
|
next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
75
79
|
line.chomp! # will use $/ which is set to options[:col_sep]
|
76
80
|
|
77
|
-
if line =~ %r{#{options[:quote_char]}}
|
78
|
-
dataA = CSV.parse( line ).flatten
|
81
|
+
if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
82
|
+
dataA = CSV.parse( line ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
79
83
|
else
|
80
84
|
dataA = line.split(options[:col_sep])
|
81
85
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,31 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
5
|
-
prerelease:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.8
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
-
|
6
|
+
authors:
|
7
|
+
- |
|
8
|
+
Tilo Sloboda
|
9
9
|
|
10
|
-
'
|
11
10
|
autorequire:
|
12
11
|
bindir: bin
|
13
12
|
cert_chain: []
|
14
|
-
|
13
|
+
|
14
|
+
date: 2013-06-01 00:00:00 Z
|
15
15
|
dependencies: []
|
16
|
-
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
17
|
-
optional features for processing large files in parallel, embedded comments, unusual
|
18
|
-
field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
19
|
-
email:
|
20
|
-
- ! 'tilo.sloboda@gmail.com
|
21
16
|
|
22
|
-
|
17
|
+
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
18
|
+
email:
|
19
|
+
- |
|
20
|
+
tilo.sloboda@gmail.com
|
21
|
+
|
23
22
|
executables: []
|
23
|
+
|
24
24
|
extensions: []
|
25
|
+
|
25
26
|
extra_rdoc_files: []
|
26
|
-
|
27
|
+
|
28
|
+
files:
|
27
29
|
- .gitignore
|
28
30
|
- .rvmrc
|
29
31
|
- Gemfile
|
@@ -37,28 +39,29 @@ files:
|
|
37
39
|
- smarter_csv.gemspec
|
38
40
|
homepage: https://github.com/tilo/smarter_csv
|
39
41
|
licenses: []
|
42
|
+
|
43
|
+
metadata: {}
|
44
|
+
|
40
45
|
post_install_message:
|
41
46
|
rdoc_options: []
|
42
|
-
|
47
|
+
|
48
|
+
require_paths:
|
43
49
|
- lib
|
44
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
version:
|
50
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
56
|
-
requirements:
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- &id001
|
53
|
+
- ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- *id001
|
59
|
+
requirements:
|
57
60
|
- csv
|
58
61
|
rubyforge_project:
|
59
|
-
rubygems_version:
|
62
|
+
rubygems_version: 2.0.3
|
60
63
|
signing_key:
|
61
|
-
specification_version:
|
62
|
-
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|
63
|
-
of optional features, e.g. chunked processing for huge CSV files
|
64
|
+
specification_version: 4
|
65
|
+
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
|
64
66
|
test_files: []
|
67
|
+
|