smarter_csv 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +44 -24
- data/lib/smarter_csv/smarter_csv.rb +10 -5
- data/lib/smarter_csv/version.rb +1 -1
- metadata +37 -33
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            --- 
         | 
| 2 | 
            +
            SHA1: 
         | 
| 3 | 
            +
              metadata.gz: ed29542688f14930b2c88be160a092ba0e7e0398
         | 
| 4 | 
            +
              data.tar.gz: 6ce2d29c27af35540f83a44f7c1af1a1f3da988e
         | 
| 5 | 
            +
            SHA512: 
         | 
| 6 | 
            +
              metadata.gz: a88bbcfc69a77beb3ef9fbcd68d48d17880e57264f4755426d0f44c64d06905ba496de9805f48cb2c83373da2ba0bb893dd71a55556fed61016b6696a4a35d95
         | 
| 7 | 
            +
              data.tar.gz: 73de0af3b952a8dc4a32f1c823162f718aeedd5304f553b4690100a245c81d68edda0280e3646e965e5c3d260176a884ea64f6a43adb3c62ae26e4bcef2bbfc7
         | 
    
        data/README.md
    CHANGED
    
    | @@ -126,30 +126,31 @@ The options and the block are optional. | |
| 126 126 |  | 
| 127 127 | 
             
            `SmarterCSV.process` supports the following options:
         | 
| 128 128 |  | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
                 :col_sep                    |   ','    | column separator
         | 
| 132 | 
            -
                 :row_sep                    | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n"
         | 
| 133 | 
            -
                 :quote_char                 |   '"'    | quotation character
         | 
| 134 | 
            -
                 :comment_regexp             |   /^#/   | regular expression which matches comment lines (see NOTE about the CSV header)
         | 
| 135 | 
            -
                 :chunk_size                 |   nil    | if set, determines the desired chunk-size (defaults to nil, no chunk processing)
         | 
| 136 | 
            -
                 :key_mapping                |   nil    | a hash which maps headers from the CSV file to keys in the result hash
         | 
| 137 | 
            -
                 :downcase_header            |   true   | downcase all column headers
         | 
| 138 | 
            -
                 :strings_as_keys            |   false  | use strings instead of symbols as the keys in the result hashes
         | 
| 139 | 
            -
                 :strip_whitespace           |   true   | remove whitespace before/after values and headers
         | 
| 140 | 
            -
                 :remove_empty_values        |   true   | remove values which have nil or empty strings as values
         | 
| 141 | 
            -
                 :remove_zero_values         |   true   | remove values which have a numeric value equal to zero / 0
         | 
| 142 | 
            -
                 :remove_values_matching     |   nil    | removes key/value pairs if value matches given regular expressions. e.g.:
         | 
| 143 | 
            -
             | 
| 144 | 
            -
                 :convert_values_to_numeric  |   true   | converts strings containing Integers or Floats to the appropriate class
         | 
| 145 | 
            -
                 :remove_empty_hashes        |   true   | remove / ignore any hashes which don't have any key/value pairs
         | 
| 146 | 
            -
                 :user_provided_headers      |   nil    | *careful with that axe!*
         | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
                 : | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 129 | 
            +
                 | Option                      | Default  |  Explanation                                                                         |
         | 
| 130 | 
            +
                 ---------------------------------------------------------------------------------------------------------------------------------
         | 
| 131 | 
            +
                 | :col_sep                    |   ','    | column separator                                                                     |
         | 
| 132 | 
            +
                 | :row_sep                    | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n" |
         | 
| 133 | 
            +
                 | :quote_char                 |   '"'    | quotation character                                                                  |
         | 
| 134 | 
            +
                 | :comment_regexp             |   /^#/   | regular expression which matches comment lines (see NOTE about the CSV header)       |
         | 
| 135 | 
            +
                 | :chunk_size                 |   nil    | if set, determines the desired chunk-size (defaults to nil, no chunk processing)     |
         | 
| 136 | 
            +
                 | :key_mapping                |   nil    | a hash which maps headers from the CSV file to keys in the result hash               |
         | 
| 137 | 
            +
                 | :downcase_header            |   true   | downcase all column headers                                                          |
         | 
| 138 | 
            +
                 | :strings_as_keys            |   false  | use strings instead of symbols as the keys in the result hashes                      |
         | 
| 139 | 
            +
                 | :strip_whitespace           |   true   | remove whitespace before/after values and headers                                    |
         | 
| 140 | 
            +
                 | :remove_empty_values        |   true   | remove values which have nil or empty strings as values                              |
         | 
| 141 | 
            +
                 | :remove_zero_values         |   true   | remove values which have a numeric value equal to zero / 0                           |
         | 
| 142 | 
            +
                 | :remove_values_matching     |   nil    | removes key/value pairs if value matches given regular expressions. e.g.:            |
         | 
| 143 | 
            +
                 |                             |          | /^\$0\.0+$/ to match $0.00 , or /^#VALUE!$/ to match errors in Excel spreadsheets    |
         | 
| 144 | 
            +
                 | :convert_values_to_numeric  |   true   | converts strings containing Integers or Floats to the appropriate class              |
         | 
| 145 | 
            +
                 | :remove_empty_hashes        |   true   | remove / ignore any hashes which don't have any key/value pairs                      |
         | 
| 146 | 
            +
                 | :user_provided_headers      |   nil    | *careful with that axe!*                                                             |
         | 
| 147 | 
            +
                 |                             |          | user provided Array of header strings or symbols, to define                          |
         | 
| 148 | 
            +
                 |                             |          | what headers should be used, overriding any in-file headers.                         |
         | 
| 149 | 
            +
                 |                             |          | You can not combine the :user_provided_headers and :key_mapping options              |
         | 
| 150 | 
            +
                 | :strip_chars_from_headers   |   nil    | remove extraneous characters from the header line (e.g. if the headers are quoted)   |
         | 
| 151 | 
            +
                 | :headers_in_file            |   true   | Whether or not the file contains headers as the first line.                          |
         | 
| 152 | 
            +
                 |                             |          | Important if the file does not contain headers,                                      |
         | 
| 153 | 
            +
                 |                             |          | otherwise you would lose the first line of data.                                     |
         | 
| 153 154 |  | 
| 154 155 |  | 
| 155 156 | 
             
            #### NOTES about CSV Headers:
         | 
| @@ -193,9 +194,17 @@ Or install it yourself as: | |
| 193 194 | 
             
                $ gem install smarter_csv
         | 
| 194 195 |  | 
| 195 196 |  | 
| 197 | 
            +
            ## Known Bugs
         | 
| 198 | 
            +
             | 
| 199 | 
            +
             * if :col_sep (e.g. a comma) appears inside a quoted field, smarter_csv <= 1.0.4 incorrectly splits on that :col_sep
         | 
| 200 | 
            +
             | 
| 196 201 |  | 
| 197 202 | 
             
            ## Changes
         | 
| 198 203 |  | 
| 204 | 
            +
            #### 1.0.5 (2013-05-08)
         | 
| 205 | 
            +
             | 
| 206 | 
            +
             * bugfix : for :headers_in_file option
         | 
| 207 | 
            +
             | 
| 199 208 | 
             
            #### 1.0.4 (2012-08-17)
         | 
| 200 209 |  | 
| 201 210 | 
             
             * renamed the following options: 
         | 
| @@ -238,6 +247,17 @@ Or install it yourself as: | |
| 238 247 | 
             
            Please [open an Issue on GitHub](https://github.com/tilo/smarter_csv/issues) if you have feedback, new feature requests, or want to report a bug. Thank you!
         | 
| 239 248 |  | 
| 240 249 |  | 
| 250 | 
            +
            ## Special Thanks
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            Many thanks to people who have filed issues and sent comments. 
         | 
| 253 | 
            +
            And a special thanks to those who contributed pull requests:
         | 
| 254 | 
            +
             | 
| 255 | 
            +
             * [Sean Duckett](http://github.com/sduckett)
         | 
| 256 | 
            +
             * [Alex Ong](http://github.com/khaong) 
         | 
| 257 | 
            +
             * [Martin Nilsson](http://github.com/MrTin) 
         | 
| 258 | 
            +
             * [Eustáquio Rangel](http://github.com/taq) 
         | 
| 259 | 
            +
             * [Pavel](http://github.com/paxa) 
         | 
| 260 | 
            +
             | 
| 241 261 |  | 
| 242 262 | 
             
            ## Contributing
         | 
| 243 263 |  | 
| @@ -31,7 +31,7 @@ module SmarterCSV | |
| 31 31 | 
             
                  if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
         | 
| 32 32 | 
             
                    # use user-provided headers 
         | 
| 33 33 | 
             
                    headerA = options[:user_provided_headers]
         | 
| 34 | 
            -
                    if defined?(file_header_size)
         | 
| 34 | 
            +
                    if defined?(file_header_size) && ! file_header_size.nil?
         | 
| 35 35 | 
             
                      if headerA.size != file_header_size
         | 
| 36 36 | 
             
                        raise SmarterCSV::HeaderSizeMismatch , "ERROR [smarter_csv]: :user_provided_headers defines #{headerA.size} headers !=  CSV-file #{filename} has #{file_header_size} headers" 
         | 
| 37 37 | 
             
                      else
         | 
| @@ -62,18 +62,23 @@ module SmarterCSV | |
| 62 62 | 
             
                  else
         | 
| 63 63 | 
             
                    use_chunks = false
         | 
| 64 64 | 
             
                  end
         | 
| 65 | 
            -
             | 
| 65 | 
            +
             | 
| 66 66 | 
             
                  # now on to processing all the rest of the lines in the CSV file:
         | 
| 67 67 | 
             
                  while ! f.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
         | 
| 68 68 | 
             
                    line = f.readline  # read one line.. this uses the input_record_separator $/ which we set previously!
         | 
| 69 69 | 
             
                    next  if  line =~ options[:comment_regexp]  # ignore all comment lines if there are any
         | 
| 70 70 | 
             
                    line.chomp!    # will use $/ which is set to options[:col_sep]
         | 
| 71 | 
            -
             | 
| 72 | 
            -
                    dataA = line.split(options[:col_sep])
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    dataA = line.split(options[:col_sep])   # ISSUE 4 : BUG : this splits incorrectly if , is inside quoted fields
         | 
| 73 73 | 
             
                    dataA.map!{|x| x.strip}  if options[:strip_whitespace]
         | 
| 74 74 | 
             
                    hash = Hash.zip(headerA,dataA)  # from Facets of Ruby library
         | 
| 75 75 | 
             
                    # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
         | 
| 76 | 
            -
                     | 
| 76 | 
            +
                    # Note: Ruby < 1.9 doesn't allow empty symbol literals!
         | 
| 77 | 
            +
                    hash.delete(nil); hash.delete('');
         | 
| 78 | 
            +
                    if RUBY_VERSION.to_f > 1.8
         | 
| 79 | 
            +
                      eval('hash.delete(:"")')
         | 
| 80 | 
            +
                    end
         | 
| 81 | 
            +
             | 
| 77 82 | 
             
                    hash.delete_if{|k,v| v.nil? || v =~ /^\s*$/}  if options[:remove_empty_values]
         | 
| 78 83 | 
             
                    hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values]   # values are typically Strings!
         | 
| 79 84 | 
             
                    hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
         | 
    
        data/lib/smarter_csv/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,29 +1,31 @@ | |
| 1 | 
            -
            --- !ruby/object:Gem::Specification
         | 
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: smarter_csv
         | 
| 3 | 
            -
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.0. | 
| 5 | 
            -
              prerelease: 
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              version: 1.0.5
         | 
| 6 5 | 
             
            platform: ruby
         | 
| 7 | 
            -
            authors:
         | 
| 8 | 
            -
            -  | 
| 6 | 
            +
            authors: 
         | 
| 7 | 
            +
            - |
         | 
| 8 | 
            +
              Tilo Sloboda
         | 
| 9 9 |  | 
| 10 | 
            -
            '
         | 
| 11 10 | 
             
            autorequire: 
         | 
| 12 11 | 
             
            bindir: bin
         | 
| 13 12 | 
             
            cert_chain: []
         | 
| 14 | 
            -
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            date: 2013-05-09 00:00:00 Z
         | 
| 15 15 | 
             
            dependencies: []
         | 
| 16 | 
            -
            description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
         | 
| 17 | 
            -
              optional features for processing large files in parallel, embedded comments, unusual
         | 
| 18 | 
            -
              field- and record-separators, flexible mapping of CSV-headers to Hash-keys
         | 
| 19 | 
            -
            email:
         | 
| 20 | 
            -
            - ! 'tilo.sloboda@gmail.com
         | 
| 21 16 |  | 
| 22 | 
            -
             | 
| 17 | 
            +
            description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys
         | 
| 18 | 
            +
            email: 
         | 
| 19 | 
            +
            - |
         | 
| 20 | 
            +
              tilo.sloboda@gmail.com
         | 
| 21 | 
            +
             | 
| 23 22 | 
             
            executables: []
         | 
| 23 | 
            +
             | 
| 24 24 | 
             
            extensions: []
         | 
| 25 | 
            +
             | 
| 25 26 | 
             
            extra_rdoc_files: []
         | 
| 26 | 
            -
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            files: 
         | 
| 27 29 | 
             
            - .gitignore
         | 
| 28 30 | 
             
            - .rvmrc
         | 
| 29 31 | 
             
            - Gemfile
         | 
| @@ -35,29 +37,31 @@ files: | |
| 35 37 | 
             
            - lib/smarter_csv/smarter_csv.rb
         | 
| 36 38 | 
             
            - lib/smarter_csv/version.rb
         | 
| 37 39 | 
             
            - smarter_csv.gemspec
         | 
| 38 | 
            -
            homepage:  | 
| 40 | 
            +
            homepage: ""
         | 
| 39 41 | 
             
            licenses: []
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            metadata: {}
         | 
| 44 | 
            +
             | 
| 40 45 | 
             
            post_install_message: 
         | 
| 41 46 | 
             
            rdoc_options: []
         | 
| 42 | 
            -
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            require_paths: 
         | 
| 43 49 | 
             
            - lib
         | 
| 44 | 
            -
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 45 | 
            -
               | 
| 46 | 
            -
               | 
| 47 | 
            -
             | 
| 48 | 
            -
                - !ruby/object:Gem::Version
         | 
| 49 | 
            -
                  version:  | 
| 50 | 
            -
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 51 | 
            -
               | 
| 52 | 
            -
               | 
| 53 | 
            -
              - - ! '>='
         | 
| 54 | 
            -
                - !ruby/object:Gem::Version
         | 
| 55 | 
            -
                  version: '0'
         | 
| 50 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 51 | 
            +
              requirements: 
         | 
| 52 | 
            +
              - &id001 
         | 
| 53 | 
            +
                - ">="
         | 
| 54 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 55 | 
            +
                  version: "0"
         | 
| 56 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 57 | 
            +
              requirements: 
         | 
| 58 | 
            +
              - *id001
         | 
| 56 59 | 
             
            requirements: []
         | 
| 60 | 
            +
             | 
| 57 61 | 
             
            rubyforge_project: 
         | 
| 58 | 
            -
            rubygems_version:  | 
| 62 | 
            +
            rubygems_version: 2.0.3
         | 
| 59 63 | 
             
            signing_key: 
         | 
| 60 | 
            -
            specification_version:  | 
| 61 | 
            -
            summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
         | 
| 62 | 
            -
              of optional features, e.g. chunked processing for huge CSV files
         | 
| 64 | 
            +
            specification_version: 4
         | 
| 65 | 
            +
            summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files
         | 
| 63 66 | 
             
            test_files: []
         | 
| 67 | 
            +
             |