censive 0.14 → 0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -2
- data/lib/censive.rb +48 -57
- metadata +2 -16
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 4211eb0036bcbadf9d1683f10084bb8cbb109d26845d6990778e159b634b8d00
         | 
| 4 | 
            +
              data.tar.gz: 1f97a3e5343958a86d54b7f1f87ada5d05a8f091d597253c3079c1a15eeb4dfa
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: f3b8e0ee3cf2d8eadbb26d6be5b590418a844561062779f47eec0863b87cfbc65868a8c89624865eb74ee7f6825ee3fb1d2d26611353a30f805d40d567c1f1ac
         | 
| 7 | 
            +
              data.tar.gz: 949e07f3f2208cbecc868a85c7bde5e67f38bf570ae1708496fd7745703c3fc133e8d48fc03c5c18c85264c0bff5b394d5ce2c2b2894daa8b7ca9033f9d68828
         | 
    
        data/censive.gemspec
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |s|
         | 
| 4 4 | 
             
              s.name        = "censive"
         | 
| 5 | 
            -
              s.version     = "0. | 
| 5 | 
            +
              s.version     = "0.15"
         | 
| 6 6 | 
             
              s.author      = "Steve Shreeve"
         | 
| 7 7 | 
             
              s.email       = "steve.shreeve@gmail.com"
         | 
| 8 8 | 
             
              s.summary     =
         | 
| @@ -11,5 +11,4 @@ Gem::Specification.new do |s| | |
| 11 11 | 
             
              s.license     = "MIT"
         | 
| 12 12 | 
             
              s.files       = `git ls-files`.split("\n") - %w[.gitignore]
         | 
| 13 13 | 
             
              s.executables = `(cd bin 2>&1) > /dev/null && git ls-files .`.split("\n")
         | 
| 14 | 
            -
              s.add_dependency "strscan", ">= 3.0.6"
         | 
| 15 14 | 
             
            end
         | 
    
        data/lib/censive.rb
    CHANGED
    
    | @@ -4,12 +4,12 @@ | |
| 4 4 | 
             
            # censive - A quick and lightweight CSV handling library for Ruby
         | 
| 5 5 | 
             
            #
         | 
| 6 6 | 
             
            # Author: Steve Shreeve (steve.shreeve@gmail.com)
         | 
| 7 | 
            -
            #   Date: Feb  | 
| 7 | 
            +
            #   Date: Feb 5, 2023
         | 
| 8 8 | 
             
            #
         | 
| 9 9 | 
             
            # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
         | 
| 10 10 | 
             
            # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
         | 
| 11 | 
            -
            # | 
| 12 | 
            -
            #  | 
| 11 | 
            +
            #
         | 
| 12 | 
            +
            # Thanks to Sutou Kouhei (kou) for his excellent advice on using scan
         | 
| 13 13 | 
             
            # ============================================================================
         | 
| 14 14 | 
             
            # GOALS:
         | 
| 15 15 | 
             
            # 1. Faster than Ruby's default CSV library
         | 
| @@ -19,7 +19,6 @@ | |
| 19 19 | 
             
            # TODO: Support IO streaming
         | 
| 20 20 | 
             
            # ============================================================================
         | 
| 21 21 |  | 
| 22 | 
            -
            require "bundler/setup"
         | 
| 23 22 | 
             
            require "strscan"
         | 
| 24 23 |  | 
| 25 24 | 
             
            class Censive < StringScanner
         | 
| @@ -33,41 +32,44 @@ class Censive < StringScanner | |
| 33 32 | 
             
              end
         | 
| 34 33 |  | 
| 35 34 | 
             
              def initialize(str=nil,
         | 
| 36 | 
            -
                drop: | 
| 37 | 
            -
                 | 
| 38 | 
            -
                 | 
| 39 | 
            -
                 | 
| 40 | 
            -
                 | 
| 41 | 
            -
                 | 
| 42 | 
            -
                 | 
| 43 | 
            -
                sep: | 
| 44 | 
            -
                strip: | 
| 45 | 
            -
                **opts | 
| 35 | 
            +
                drop:   false   , # drop trailing empty fields?
         | 
| 36 | 
            +
                excel:  false   , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
         | 
| 37 | 
            +
                mode:   :compact, # export mode: compact or full
         | 
| 38 | 
            +
                out:    $stdout , # output stream, needs to respond to <<
         | 
| 39 | 
            +
                quote:  '"'     , # quote character
         | 
| 40 | 
            +
                relax:  false   , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
         | 
| 41 | 
            +
                rowsep: "\n"    , # row separator for export
         | 
| 42 | 
            +
                sep:    ","     , # column separator character
         | 
| 43 | 
            +
                strip:  false   , # strip fields when reading
         | 
| 44 | 
            +
                **opts            # grab bag
         | 
| 46 45 | 
             
              )
         | 
| 47 46 | 
             
                super(str || "")
         | 
| 48 47 | 
             
                reset
         | 
| 49 48 |  | 
| 50 | 
            -
                 | 
| 51 | 
            -
                @ | 
| 52 | 
            -
                @excel | 
| 53 | 
            -
                @mode | 
| 54 | 
            -
                @out | 
| 55 | 
            -
                @quote | 
| 56 | 
            -
                @relax | 
| 57 | 
            -
                @ | 
| 58 | 
            -
                @ | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
                 | 
| 62 | 
            -
                @ | 
| 63 | 
            -
                @ | 
| 64 | 
            -
                @ | 
| 49 | 
            +
                # options
         | 
| 50 | 
            +
                @drop    = drop
         | 
| 51 | 
            +
                @excel   = excel
         | 
| 52 | 
            +
                @mode    = mode
         | 
| 53 | 
            +
                @out     = out
         | 
| 54 | 
            +
                @quote   = quote
         | 
| 55 | 
            +
                @relax   = relax
         | 
| 56 | 
            +
                @rowsep  = rowsep
         | 
| 57 | 
            +
                @sep     = sep
         | 
| 58 | 
            +
                @strip   = strip
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                # determined
         | 
| 61 | 
            +
                @cr  = "\r"
         | 
| 62 | 
            +
                @lf  = "\n"
         | 
| 63 | 
            +
                @es  = ""
         | 
| 64 | 
            +
                @eq  = "="
         | 
| 65 | 
            +
                @esc = (@quote * 2)
         | 
| 66 | 
            +
                @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o             # end of line
         | 
| 67 | 
            +
                @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
         | 
| 65 68 | 
             
              end
         | 
| 66 69 |  | 
| 67 70 | 
             
              def reset(str=nil)
         | 
| 68 71 | 
             
                self.string = str if str
         | 
| 69 72 | 
             
                super()
         | 
| 70 | 
            -
                @char = currchar
         | 
| 71 73 | 
             
                @rows = nil
         | 
| 72 74 | 
             
                @cols = @cells = 0
         | 
| 73 75 | 
             
              end
         | 
| @@ -75,36 +77,26 @@ class Censive < StringScanner | |
| 75 77 | 
             
              # ==[ Lexer ]==
         | 
| 76 78 |  | 
| 77 79 | 
             
              def next_token
         | 
| 78 | 
            -
                if @excel && @ | 
| 79 | 
            -
                  excel = true
         | 
| 80 | 
            -
                  @char = nextchar
         | 
| 81 | 
            -
                end
         | 
| 80 | 
            +
                excel = true if @excel && scan(@eq)
         | 
| 82 81 |  | 
| 83 | 
            -
                if @ | 
| 82 | 
            +
                if scan(@quote) # consume quoted cell
         | 
| 84 83 | 
             
                  token = ""
         | 
| 85 84 | 
             
                  while true
         | 
| 86 | 
            -
                    @ | 
| 87 | 
            -
                    token <<  | 
| 88 | 
            -
                     | 
| 89 | 
            -
                    break if [@sep,@cr,@lf,@es,nil].include?(@char)
         | 
| 85 | 
            +
                    token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
         | 
| 86 | 
            +
                    token << @quote and next if scan(@quote)
         | 
| 87 | 
            +
                    break if scan(@eoc)
         | 
| 90 88 | 
             
                    @relax or bomb "invalid character after quote"
         | 
| 91 | 
            -
                     | 
| 92 | 
            -
             | 
| 93 | 
            -
                  @char = nextchar if @char == @sep
         | 
| 94 | 
            -
                  @strip ? token.strip : token
         | 
| 95 | 
            -
                elsif [@sep,@cr,@lf,@es,nil].include?(@char)
         | 
| 96 | 
            -
                  case @char
         | 
| 97 | 
            -
                  when @sep then  @char = nextchar                             ; @es
         | 
| 98 | 
            -
                  when @cr  then (@char = nextchar) == @lf and @char = nextchar; nil
         | 
| 99 | 
            -
                  when @lf  then  @char = nextchar                             ; nil
         | 
| 100 | 
            -
                  else                                                           nil
         | 
| 89 | 
            +
                    quoted = scan_until(/#{@quote}/o) or bomb "invalid inline quote"
         | 
| 90 | 
            +
                    token << @quote + quoted + @quote
         | 
| 101 91 | 
             
                  end
         | 
| 92 | 
            +
                elsif scan(@sep) then return @es
         | 
| 93 | 
            +
                elsif scan(@eol) then return nil
         | 
| 102 94 | 
             
                else # consume unquoted cell
         | 
| 103 | 
            -
                  token = scan_until( | 
| 95 | 
            +
                  token = scan_until(@eoc) or bomb "unexpected character"
         | 
| 104 96 | 
             
                  token.prepend(@eq) if excel
         | 
| 105 | 
            -
                  @char = nextchar if (@char = currchar) == @sep
         | 
| 106 | 
            -
                  @strip ? token.strip : token
         | 
| 107 97 | 
             
                end
         | 
| 98 | 
            +
                scan(@sep)
         | 
| 99 | 
            +
                @strip ? token.strip : token
         | 
| 108 100 | 
             
              end
         | 
| 109 101 |  | 
| 110 102 | 
             
              def bomb(msg)
         | 
| @@ -178,7 +170,7 @@ class Censive < StringScanner | |
| 178 170 | 
             
                  end
         | 
| 179 171 | 
             
                end.join(s)
         | 
| 180 172 |  | 
| 181 | 
            -
                @out << out + @ | 
| 173 | 
            +
                @out << out + @rowsep
         | 
| 182 174 | 
             
              end
         | 
| 183 175 |  | 
| 184 176 | 
             
              def each
         | 
| @@ -201,11 +193,10 @@ class Censive < StringScanner | |
| 201 193 | 
             
            end
         | 
| 202 194 |  | 
| 203 195 | 
             
            if __FILE__ == $0
         | 
| 204 | 
            -
              raw = DATA. | 
| 205 | 
            -
            # raw = DATA.gets("\n\n").chomp
         | 
| 196 | 
            +
              raw = DATA.gets("\n\n").chomp
         | 
| 206 197 | 
             
            # raw = File.read(ARGV.first || "lc-2023.csv")
         | 
| 207 | 
            -
              csv = Censive.new(raw, excel: true, relax: true | 
| 208 | 
            -
              csv.export(sep: " | 
| 198 | 
            +
              csv = Censive.new(raw, excel: true, relax: true)
         | 
| 199 | 
            +
              csv.export # (sep: ",", excel: true)
         | 
| 209 200 | 
             
            end
         | 
| 210 201 |  | 
| 211 202 | 
             
            __END__
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: censive
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: '0. | 
| 4 | 
            +
              version: '0.15'
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Steve Shreeve
         | 
| @@ -9,21 +9,7 @@ autorequire: | |
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 11 | 
             
            date: 2023-02-05 00:00:00.000000000 Z
         | 
| 12 | 
            -
            dependencies:
         | 
| 13 | 
            -
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            -
              name: strscan
         | 
| 15 | 
            -
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            -
                requirements:
         | 
| 17 | 
            -
                - - ">="
         | 
| 18 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            -
                    version: 3.0.6
         | 
| 20 | 
            -
              type: :runtime
         | 
| 21 | 
            -
              prerelease: false
         | 
| 22 | 
            -
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            -
                requirements:
         | 
| 24 | 
            -
                - - ">="
         | 
| 25 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            -
                    version: 3.0.6
         | 
| 12 | 
            +
            dependencies: []
         | 
| 27 13 | 
             
            description: A quick and lightweight CSV handling library for Ruby
         | 
| 28 14 | 
             
            email: steve.shreeve@gmail.com
         | 
| 29 15 | 
             
            executables: []
         |