smarter_csv 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4426aea4be447cb000436980c4b8f34ac12d9683
4
+ data.tar.gz: 5f15ed8a24db46241f02e672d099413445011238
5
+ SHA512:
6
+ metadata.gz: fbf06328d2e51caa2e66ba913b7a6b49d0db4d37caf932a7f66846db3a47207adc066be4d7df65c6b215bf20f3d6d5713f85b0c1e3e5cd1f930ca573949c69ff
7
+ data.tar.gz: e098866b5d33c254136854853f5bd55bac89bcae93f286365518986a4d254d17ea116004a5367c5538a8dab6d8854bd6383109aac2889b503c20f86cc47c8f84
data/README.md CHANGED
@@ -152,6 +152,9 @@ The options and the block are optional.
152
152
  | | | Important if the file does not contain headers, |
153
153
  | | | otherwise you would lose the first line of data. |
154
154
  | :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
155
+ | :force_simple_split | false | force simiple splitting on :col_sep character for non-standard CSV-files. |
156
+ | | | e.g. when :quote_char is not properly escaped |
157
+ | :verbose | false | print out line number while processing (to track down problems in input files) |
155
158
 
156
159
 
157
160
  #### NOTES about CSV Headers:
@@ -197,6 +200,13 @@ Or install it yourself as:
197
200
 
198
201
  ## Changes
199
202
 
203
+ #### 1.0.8 (2013-06-01)
204
+
205
+ * bugfix : fixed issue with nil values in inputs with quote-char (thanks to Félix Bellanger)
206
+ * new options:
207
+ * :force_simple_split : to force simiple splitting on :col_sep character for non-standard CSV-files. e.g. without properly escaped :quote_char
208
+ * :verbose : print out line number while processing (to track down problems in input files)
209
+
200
210
  #### 1.0.7 (2013-05-20)
201
211
 
202
212
  * allowing process to work with objects with a 'readline' method (thanks to taq)
@@ -263,6 +273,7 @@ And a special thanks to those who contributed pull requests:
263
273
  * [Martin Nilsson](http://github.com/MrTin)
264
274
  * [Eustáquio Rangel](http://github.com/taq)
265
275
  * [Pavel](http://github.com/paxa)
276
+ * [Félix Bellanger](https://github.com/Keeguon)
266
277
 
267
278
 
268
279
  ## Contributing
@@ -4,7 +4,7 @@ module SmarterCSV
4
4
  end
5
5
 
6
6
  def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
7
- default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
7
+ default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
8
8
  :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
9
9
  :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
10
10
  :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8'
@@ -13,6 +13,7 @@ module SmarterCSV
13
13
  headerA = []
14
14
  result = []
15
15
  old_row_sep = $/
16
+ line_count = 0
16
17
  begin
17
18
  $/ = options[:row_sep]
18
19
  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
@@ -21,9 +22,10 @@ module SmarterCSV
21
22
  # process the header line in the CSV file..
22
23
  # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
23
24
  header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
25
+ line_count += 1
24
26
  header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
25
- if header =~ %r{#{options[:quote_char]}}
26
- file_headerA = CSV.parse( header ).flatten
27
+ if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
28
+ file_headerA = CSV.parse( header ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
27
29
  else
28
30
  file_headerA = header.split(options[:col_sep])
29
31
  end
@@ -71,11 +73,13 @@ module SmarterCSV
71
73
  # now on to processing all the rest of the lines in the CSV file:
72
74
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
73
75
  line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
76
+ line_count += 1
77
+ print "processing line %10d\r" % line_count if options[:verbose]
74
78
  next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
75
79
  line.chomp! # will use $/ which is set to options[:col_sep]
76
80
 
77
- if line =~ %r{#{options[:quote_char]}}
78
- dataA = CSV.parse( line ).flatten
81
+ if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
82
+ dataA = CSV.parse( line ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
79
83
  else
80
84
  dataA = line.split(options[:col_sep])
81
85
  end
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.0.7"
2
+ VERSION = "1.0.8"
3
3
  end
metadata CHANGED
@@ -1,29 +1,31 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
- version: !ruby/object:Gem::Version
4
- version: 1.0.7
5
- prerelease:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.8
6
5
  platform: ruby
7
- authors:
8
- - ! 'Tilo Sloboda
6
+ authors:
7
+ - |
8
+ Tilo Sloboda
9
9
 
10
- '
11
10
  autorequire:
12
11
  bindir: bin
13
12
  cert_chain: []
14
- date: 2013-05-20 00:00:00.000000000 Z
13
+
14
+ date: 2013-06-01 00:00:00 Z
15
15
  dependencies: []
16
- description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
17
- optional features for processing large files in parallel, embedded comments, unusual
18
- field- and record-separators, flexible mapping of CSV-headers to Hash-keys
19
- email:
20
- - ! 'tilo.sloboda@gmail.com
21
16
 
22
- '
17
+ description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
18
+ email:
19
+ - |
20
+ tilo.sloboda@gmail.com
21
+
23
22
  executables: []
23
+
24
24
  extensions: []
25
+
25
26
  extra_rdoc_files: []
26
- files:
27
+
28
+ files:
27
29
  - .gitignore
28
30
  - .rvmrc
29
31
  - Gemfile
@@ -37,28 +39,29 @@ files:
37
39
  - smarter_csv.gemspec
38
40
  homepage: https://github.com/tilo/smarter_csv
39
41
  licenses: []
42
+
43
+ metadata: {}
44
+
40
45
  post_install_message:
41
46
  rdoc_options: []
42
- require_paths:
47
+
48
+ require_paths:
43
49
  - lib
44
- required_ruby_version: !ruby/object:Gem::Requirement
45
- none: false
46
- requirements:
47
- - - ! '>='
48
- - !ruby/object:Gem::Version
49
- version: '0'
50
- required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
- requirements:
53
- - - ! '>='
54
- - !ruby/object:Gem::Version
55
- version: '0'
56
- requirements:
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - &id001
53
+ - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - *id001
59
+ requirements:
57
60
  - csv
58
61
  rubyforge_project:
59
- rubygems_version: 1.8.15
62
+ rubygems_version: 2.0.3
60
63
  signing_key:
61
- specification_version: 3
62
- summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
63
- of optional features, e.g. chunked processing for huge CSV files
64
+ specification_version: 4
65
+ summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
64
66
  test_files: []
67
+