smarter_csv 1.0.7 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4426aea4be447cb000436980c4b8f34ac12d9683
4
+ data.tar.gz: 5f15ed8a24db46241f02e672d099413445011238
5
+ SHA512:
6
+ metadata.gz: fbf06328d2e51caa2e66ba913b7a6b49d0db4d37caf932a7f66846db3a47207adc066be4d7df65c6b215bf20f3d6d5713f85b0c1e3e5cd1f930ca573949c69ff
7
+ data.tar.gz: e098866b5d33c254136854853f5bd55bac89bcae93f286365518986a4d254d17ea116004a5367c5538a8dab6d8854bd6383109aac2889b503c20f86cc47c8f84
data/README.md CHANGED
@@ -152,6 +152,9 @@ The options and the block are optional.
152
152
  | | | Important if the file does not contain headers, |
153
153
  | | | otherwise you would lose the first line of data. |
154
154
  | :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
155
+ | :force_simple_split | false | force simiple splitting on :col_sep character for non-standard CSV-files. |
156
+ | | | e.g. when :quote_char is not properly escaped |
157
+ | :verbose | false | print out line number while processing (to track down problems in input files) |
155
158
 
156
159
 
157
160
  #### NOTES about CSV Headers:
@@ -197,6 +200,13 @@ Or install it yourself as:
197
200
 
198
201
  ## Changes
199
202
 
203
+ #### 1.0.8 (2013-06-01)
204
+
205
+ * bugfix : fixed issue with nil values in inputs with quote-char (thanks to Félix Bellanger)
206
+ * new options:
207
+ * :force_simple_split : to force simiple splitting on :col_sep character for non-standard CSV-files. e.g. without properly escaped :quote_char
208
+ * :verbose : print out line number while processing (to track down problems in input files)
209
+
200
210
  #### 1.0.7 (2013-05-20)
201
211
 
202
212
  * allowing process to work with objects with a 'readline' method (thanks to taq)
@@ -263,6 +273,7 @@ And a special thanks to those who contributed pull requests:
263
273
  * [Martin Nilsson](http://github.com/MrTin)
264
274
  * [Eustáquio Rangel](http://github.com/taq)
265
275
  * [Pavel](http://github.com/paxa)
276
+ * [Félix Bellanger](https://github.com/Keeguon)
266
277
 
267
278
 
268
279
  ## Contributing
@@ -4,7 +4,7 @@ module SmarterCSV
4
4
  end
5
5
 
6
6
  def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
7
- default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"',
7
+ default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
8
8
  :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
9
9
  :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
10
10
  :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8'
@@ -13,6 +13,7 @@ module SmarterCSV
13
13
  headerA = []
14
14
  result = []
15
15
  old_row_sep = $/
16
+ line_count = 0
16
17
  begin
17
18
  $/ = options[:row_sep]
18
19
  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
@@ -21,9 +22,10 @@ module SmarterCSV
21
22
  # process the header line in the CSV file..
22
23
  # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
23
24
  header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
25
+ line_count += 1
24
26
  header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
25
- if header =~ %r{#{options[:quote_char]}}
26
- file_headerA = CSV.parse( header ).flatten
27
+ if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
28
+ file_headerA = CSV.parse( header ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
27
29
  else
28
30
  file_headerA = header.split(options[:col_sep])
29
31
  end
@@ -71,11 +73,13 @@ module SmarterCSV
71
73
  # now on to processing all the rest of the lines in the CSV file:
72
74
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
73
75
  line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
76
+ line_count += 1
77
+ print "processing line %10d\r" % line_count if options[:verbose]
74
78
  next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
75
79
  line.chomp! # will use $/ which is set to options[:col_sep]
76
80
 
77
- if line =~ %r{#{options[:quote_char]}}
78
- dataA = CSV.parse( line ).flatten
81
+ if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
82
+ dataA = CSV.parse( line ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
79
83
  else
80
84
  dataA = line.split(options[:col_sep])
81
85
  end
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.0.7"
2
+ VERSION = "1.0.8"
3
3
  end
metadata CHANGED
@@ -1,29 +1,31 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
- version: !ruby/object:Gem::Version
4
- version: 1.0.7
5
- prerelease:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.8
6
5
  platform: ruby
7
- authors:
8
- - ! 'Tilo Sloboda
6
+ authors:
7
+ - |
8
+ Tilo Sloboda
9
9
 
10
- '
11
10
  autorequire:
12
11
  bindir: bin
13
12
  cert_chain: []
14
- date: 2013-05-20 00:00:00.000000000 Z
13
+
14
+ date: 2013-06-01 00:00:00 Z
15
15
  dependencies: []
16
- description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
17
- optional features for processing large files in parallel, embedded comments, unusual
18
- field- and record-separators, flexible mapping of CSV-headers to Hash-keys
19
- email:
20
- - ! 'tilo.sloboda@gmail.com
21
16
 
22
- '
17
+ description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
18
+ email:
19
+ - |
20
+ tilo.sloboda@gmail.com
21
+
23
22
  executables: []
23
+
24
24
  extensions: []
25
+
25
26
  extra_rdoc_files: []
26
- files:
27
+
28
+ files:
27
29
  - .gitignore
28
30
  - .rvmrc
29
31
  - Gemfile
@@ -37,28 +39,29 @@ files:
37
39
  - smarter_csv.gemspec
38
40
  homepage: https://github.com/tilo/smarter_csv
39
41
  licenses: []
42
+
43
+ metadata: {}
44
+
40
45
  post_install_message:
41
46
  rdoc_options: []
42
- require_paths:
47
+
48
+ require_paths:
43
49
  - lib
44
- required_ruby_version: !ruby/object:Gem::Requirement
45
- none: false
46
- requirements:
47
- - - ! '>='
48
- - !ruby/object:Gem::Version
49
- version: '0'
50
- required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
- requirements:
53
- - - ! '>='
54
- - !ruby/object:Gem::Version
55
- version: '0'
56
- requirements:
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - &id001
53
+ - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - *id001
59
+ requirements:
57
60
  - csv
58
61
  rubyforge_project:
59
- rubygems_version: 1.8.15
62
+ rubygems_version: 2.0.3
60
63
  signing_key:
61
- specification_version: 3
62
- summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
63
- of optional features, e.g. chunked processing for huge CSV files
64
+ specification_version: 4
65
+ summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
64
66
  test_files: []
67
+