smarter_csv 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +11 -0
- data/lib/smarter_csv/smarter_csv.rb +9 -5
- data/lib/smarter_csv/version.rb +1 -1
- metadata +36 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4426aea4be447cb000436980c4b8f34ac12d9683
|
4
|
+
data.tar.gz: 5f15ed8a24db46241f02e672d099413445011238
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fbf06328d2e51caa2e66ba913b7a6b49d0db4d37caf932a7f66846db3a47207adc066be4d7df65c6b215bf20f3d6d5713f85b0c1e3e5cd1f930ca573949c69ff
|
7
|
+
data.tar.gz: e098866b5d33c254136854853f5bd55bac89bcae93f286365518986a4d254d17ea116004a5367c5538a8dab6d8854bd6383109aac2889b503c20f86cc47c8f84
|
data/README.md
CHANGED
@@ -152,6 +152,9 @@ The options and the block are optional.
|
|
152
152
|
| | | Important if the file does not contain headers, |
|
153
153
|
| | | otherwise you would lose the first line of data. |
|
154
154
|
| :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
|
155
|
+
| :force_simple_split | false | force simiple splitting on :col_sep character for non-standard CSV-files. |
|
156
|
+
| | | e.g. when :quote_char is not properly escaped |
|
157
|
+
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
155
158
|
|
156
159
|
|
157
160
|
#### NOTES about CSV Headers:
|
@@ -197,6 +200,13 @@ Or install it yourself as:
|
|
197
200
|
|
198
201
|
## Changes
|
199
202
|
|
203
|
+
#### 1.0.8 (2013-06-01)
|
204
|
+
|
205
|
+
* bugfix : fixed issue with nil values in inputs with quote-char (thanks to Félix Bellanger)
|
206
|
+
* new options:
|
207
|
+
* :force_simple_split : to force simiple splitting on :col_sep character for non-standard CSV-files. e.g. without properly escaped :quote_char
|
208
|
+
* :verbose : print out line number while processing (to track down problems in input files)
|
209
|
+
|
200
210
|
#### 1.0.7 (2013-05-20)
|
201
211
|
|
202
212
|
* allowing process to work with objects with a 'readline' method (thanks to taq)
|
@@ -263,6 +273,7 @@ And a special thanks to those who contributed pull requests:
|
|
263
273
|
* [Martin Nilsson](http://github.com/MrTin)
|
264
274
|
* [Eustáquio Rangel](http://github.com/taq)
|
265
275
|
* [Pavel](http://github.com/paxa)
|
276
|
+
* [Félix Bellanger](https://github.com/Keeguon)
|
266
277
|
|
267
278
|
|
268
279
|
## Contributing
|
@@ -4,7 +4,7 @@ module SmarterCSV
|
|
4
4
|
end
|
5
5
|
|
6
6
|
def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
|
7
|
-
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
|
7
|
+
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
|
8
8
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
9
9
|
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
10
10
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8'
|
@@ -13,6 +13,7 @@ module SmarterCSV
|
|
13
13
|
headerA = []
|
14
14
|
result = []
|
15
15
|
old_row_sep = $/
|
16
|
+
line_count = 0
|
16
17
|
begin
|
17
18
|
$/ = options[:row_sep]
|
18
19
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
@@ -21,9 +22,10 @@ module SmarterCSV
|
|
21
22
|
# process the header line in the CSV file..
|
22
23
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
23
24
|
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
25
|
+
line_count += 1
|
24
26
|
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
25
|
-
if header =~ %r{#{options[:quote_char]}}
|
26
|
-
file_headerA = CSV.parse( header ).flatten
|
27
|
+
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
28
|
+
file_headerA = CSV.parse( header ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
27
29
|
else
|
28
30
|
file_headerA = header.split(options[:col_sep])
|
29
31
|
end
|
@@ -71,11 +73,13 @@ module SmarterCSV
|
|
71
73
|
# now on to processing all the rest of the lines in the CSV file:
|
72
74
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
73
75
|
line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
|
76
|
+
line_count += 1
|
77
|
+
print "processing line %10d\r" % line_count if options[:verbose]
|
74
78
|
next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
75
79
|
line.chomp! # will use $/ which is set to options[:col_sep]
|
76
80
|
|
77
|
-
if line =~ %r{#{options[:quote_char]}}
|
78
|
-
dataA = CSV.parse( line ).flatten
|
81
|
+
if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
82
|
+
dataA = CSV.parse( line ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
79
83
|
else
|
80
84
|
dataA = line.split(options[:col_sep])
|
81
85
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,31 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
5
|
-
prerelease:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.8
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
-
|
6
|
+
authors:
|
7
|
+
- |
|
8
|
+
Tilo Sloboda
|
9
9
|
|
10
|
-
'
|
11
10
|
autorequire:
|
12
11
|
bindir: bin
|
13
12
|
cert_chain: []
|
14
|
-
|
13
|
+
|
14
|
+
date: 2013-06-01 00:00:00 Z
|
15
15
|
dependencies: []
|
16
|
-
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
17
|
-
optional features for processing large files in parallel, embedded comments, unusual
|
18
|
-
field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
19
|
-
email:
|
20
|
-
- ! 'tilo.sloboda@gmail.com
|
21
16
|
|
22
|
-
|
17
|
+
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
18
|
+
email:
|
19
|
+
- |
|
20
|
+
tilo.sloboda@gmail.com
|
21
|
+
|
23
22
|
executables: []
|
23
|
+
|
24
24
|
extensions: []
|
25
|
+
|
25
26
|
extra_rdoc_files: []
|
26
|
-
|
27
|
+
|
28
|
+
files:
|
27
29
|
- .gitignore
|
28
30
|
- .rvmrc
|
29
31
|
- Gemfile
|
@@ -37,28 +39,29 @@ files:
|
|
37
39
|
- smarter_csv.gemspec
|
38
40
|
homepage: https://github.com/tilo/smarter_csv
|
39
41
|
licenses: []
|
42
|
+
|
43
|
+
metadata: {}
|
44
|
+
|
40
45
|
post_install_message:
|
41
46
|
rdoc_options: []
|
42
|
-
|
47
|
+
|
48
|
+
require_paths:
|
43
49
|
- lib
|
44
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
version:
|
50
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
56
|
-
requirements:
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- &id001
|
53
|
+
- ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- *id001
|
59
|
+
requirements:
|
57
60
|
- csv
|
58
61
|
rubyforge_project:
|
59
|
-
rubygems_version:
|
62
|
+
rubygems_version: 2.0.3
|
60
63
|
signing_key:
|
61
|
-
specification_version:
|
62
|
-
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|
63
|
-
of optional features, e.g. chunked processing for huge CSV files
|
64
|
+
specification_version: 4
|
65
|
+
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
|
64
66
|
test_files: []
|
67
|
+
|