RubyGems - rex-text - Versions diffs - 0.1.0 - Mend

rex-text 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +1 -0
data/.gitignore +9 -0
data/.rspec +2 -0
data/.travis.yml +5 -0
data/CODE_OF_CONDUCT.md +52 -0
data/Gemfile +4 -0
data/LICENSE +27 -0
data/README.md +32 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/lib/rex/codepage.map +104 -0
data/lib/rex/text.rb +195 -0
data/lib/rex/text/badchars.rb +50 -0
data/lib/rex/text/base32.rb +87 -0
data/lib/rex/text/base64.rb +42 -0
data/lib/rex/text/binary_manipulation.rb +117 -0
data/lib/rex/text/block_api.rb +33 -0
data/lib/rex/text/checksum.rb +39 -0
data/lib/rex/text/compress.rb +105 -0
data/lib/rex/text/ebcdic.rb +219 -0
data/lib/rex/text/encode.rb +104 -0
data/lib/rex/text/hash.rb +37 -0
data/lib/rex/text/hex.rb +204 -0
data/lib/rex/text/illegal_sequence.rb +6 -0
data/lib/rex/text/lang.rb +156 -0
data/lib/rex/text/pattern.rb +91 -0
data/lib/rex/text/rand.rb +233 -0
data/lib/rex/text/randomize.rb +125 -0
data/lib/rex/text/silly.rb +59 -0
data/lib/rex/text/unicode.rb +276 -0
data/lib/rex/text/version.rb +5 -0
data/rex-text.gemspec +24 -0
metadata +195 -0
metadata.gz.sig +3 -0

data/lib/rex/text/randomize.rb ADDED Viewed

@@ -0,0 +1,125 @@
+# -*- coding: binary -*-
+module Rex
+  module Text
+    # We are re-opening the module to add these module methods.
+    # Breaking them up this way allows us to maintain a little higher
+    # degree of organisation and make it easier to find what you're looking for
+    # without hanging the underlying calls that we historically rely upon.
+    #
+    # Converts a string to random case
+    #
+    # @example
+    #   Rex::Text.to_rand_case("asdf") # => "asDf"
+    #
+    # @param str [String] The string to randomize
+    # @return [String]
+    # @see permute_case
+    # @see to_mixed_case_array
+    def self.to_rand_case(str)
+      buf = str.dup
+      0.upto(str.length) do |i|
+        buf[i,1] = rand(2) == 0 ? str[i,1].upcase : str[i,1].downcase
+      end
+      return buf
+    end
+    #
+    # Takes a string, and returns an array of all mixed case versions.
+    #
+    # @example
+    #   >> Rex::Text.to_mixed_case_array "abc1"
+    #   => ["abc1", "abC1", "aBc1", "aBC1", "Abc1", "AbC1", "ABc1", "ABC1"]
+    #
+    # @param str [String] The string to randomize
+    # @return [Array<String>]
+    # @see permute_case
+    def self.to_mixed_case_array(str)
+      letters = []
+      str.scan(/./).each { |l| letters << [l.downcase, l.upcase] }
+      coords = []
+      (1 << str.size).times { |i| coords << ("%0#{str.size}b" % i) }
+      mixed = []
+      coords.each do |coord|
+        c = coord.scan(/./).map {|x| x.to_i}
+        this_str = ""
+        c.each_with_index { |d,i| this_str << letters[i][d] }
+        mixed << this_str
+      end
+      return mixed.uniq
+    end
+    #
+    # Randomize the whitespace in a string
+    #
+    def self.randomize_space(str)
+      set = ["\x09", "\x20", "\x0d", "\x0a"]
+      str.gsub(/\s+/) { |s|
+        len = rand(50)+2
+        buf = ''
+        while (buf.length < len)
+          buf << set.sample
+        end
+        buf
+      }
+    end
+    #
+    # Shuffles a byte stream
+    #
+    # @param str [String]
+    # @return [String] The shuffled result
+    # @see shuffle_a
+    def self.shuffle_s(str)
+      shuffle_a(str.unpack("C*")).pack("C*")
+    end
+    #
+    # Performs a Fisher-Yates shuffle on an array
+    #
+    # Modifies +arr+ in place
+    #
+    # @param arr [Array] The array to be shuffled
+    # @return [Array]
+    def self.shuffle_a(arr)
+      len = arr.length
+      max = len - 1
+      cyc = [* (0..max) ]
+      for d in cyc
+        e = rand(d+1)
+        next if e == d
+        f = arr[d];
+        g = arr[e];
+        arr[d] = g;
+        arr[e] = f;
+      end
+      return arr
+    end
+    # Permute the case of a word
+    def self.permute_case(word, idx=0)
+      res = []
+      if( (UpperAlpha+LowerAlpha).index(word[idx,1]))
+        word_ucase = word.dup
+        word_ucase[idx, 1] = word[idx, 1].upcase
+        word_lcase = word.dup
+        word_lcase[idx, 1] = word[idx, 1].downcase
+        if (idx == word.length)
+          return [word]
+        else
+          res << permute_case(word_ucase, idx+1)
+          res << permute_case(word_lcase, idx+1)
+        end
+      else
+        res << permute_case(word, idx+1)
+      end
+      res.flatten
+    end
+  end
+end

data/lib/rex/text/silly.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# -*- coding: binary -*-
+module Rex
+  module Text
+    # We are re-opening the module to add these module methods.
+    # Breaking them up this way allows us to maintain a little higher
+    # degree of organisation and make it easier to find what you're looking for
+    # without hanging the underlying calls that we historically rely upon.
+    #
+    # Converts a string to one similar to what would be used by cowsay(1), a UNIX utility for
+    # displaying text as if it was coming from an ASCII-cow's mouth:
+    #
+    #       __________________
+    #      < the cow says moo >
+    #       ------------------
+    #              \   ^__^
+    #               \  (oo)\_______
+    #                  (__)\       )\/\
+    #                      ||----w |
+    #                      ||     ||
+    #
+    # @param text [String] The string to cowsay
+    # @param width [Fixnum] Width of the cow's cloud.  Default's to cowsay(1)'s default, 39.
+    def self.cowsay(text, width=39)
+      # cowsay(1) chunks a message up into 39-byte chunks and wraps it in '| ' and ' |'
+      # Rex::Text.wordwrap(text, 0, 39, ' |', '| ') almost does this, but won't
+      # split a word that has > 39 characters in it which results in oddly formed
+      # text in the cowsay banner, so just do it by hand.  This big mess wraps
+      # the provided text in an ASCII-cloud and then makes it look like the cloud
+      # is a thought/word coming from the ASCII-cow.  Each line in the
+      # ASCII-cloud is no more than the specified number-characters long, and the
+      # cloud corners are made to look rounded
+      text_lines = text.scan(Regexp.new(".{1,#{width-4}}"))
+      max_length = text_lines.map(&:size).sort.last
+      cloud_parts = []
+      cloud_parts << " #{'_' * (max_length + 2)}"
+      if text_lines.size == 1
+        cloud_parts << "< #{text} >"
+      else
+        cloud_parts << "/ #{text_lines.first.ljust(max_length, ' ')} \\"
+        if text_lines.size > 2
+          text_lines[1, text_lines.length - 2].each do |line|
+            cloud_parts << "| #{line.ljust(max_length, ' ')} |"
+          end
+        end
+        cloud_parts << "\\ #{text_lines.last.ljust(max_length, ' ')} /"
+      end
+      cloud_parts << " #{'-' * (max_length + 2)}"
+      cloud_parts << <<EOS
+       \\   ,__,
+        \\  (oo)____
+           (__)    )\\
+              ||--|| *
+EOS
+      cloud_parts.join("\n")
+    end
+  end
+end

data/lib/rex/text/unicode.rb ADDED Viewed

@@ -0,0 +1,276 @@
+# -*- coding: binary -*-
+module Rex
+  module Text
+    # We are re-opening the module to add these module methods.
+    # Breaking them up this way allows us to maintain a little higher
+    # degree of organisation and make it easier to find what you're looking for
+    # without hanging the underlying calls that we historically rely upon.
+    #
+    # Converts standard ASCII text to a unicode string.
+    #
+    # Supported unicode types include: utf-16le, utf16-be, utf32-le,
+    # utf32-be, utf-7, and utf-8
+    #
+    # Providing 'mode' provides hints to the actual encoder as to how it
+    # should encode the string.
+    #
+    # Only UTF-7 and UTF-8 use "mode".
+    #
+    # utf-7 by default does not encode alphanumeric and a few other
+    # characters.  By specifying the mode of "all", then all of the
+    # characters are encoded, not just the non-alphanumeric set.
+    # to_unicode(str, 'utf-7', 'all')
+    #
+    # utf-8 specifies that alphanumeric characters are used directly, eg
+    # "a" is just "a".  However, there exist 6 different overlong
+    # encodings of "a" that are technically not valid, but parse just fine
+    # in most utf-8 parsers.  (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1,
+    # 0xFC80808081A1, 0xFE8080808081A1).  How many bytes to use for the
+    # overlong enocding is specified providing 'size'.  to_unicode(str,
+    # 'utf-8', 'overlong', 2)
+    #
+    # Many utf-8 parsers also allow invalid overlong encodings, where bits
+    # that are unused when encoding a single byte are modified.  Many
+    # parsers will ignore these bits, rendering simple string matching to
+    # be ineffective for dealing with UTF-8 strings.  There are many more
+    # invalid overlong encodings possible for "a".  For example, three
+    # encodings are available for an invalid 2 byte encoding of "a".
+    # (0xC1E1 0xC161 0xC121).
+    #
+    # By specifying "invalid", a random invalid encoding is chosen for the
+    # given byte size.  to_unicode(str, 'utf-8', 'invalid', 2)
+    #
+    # utf-7 defaults to 'normal' utf-7 encoding utf-8 defaults to 2 byte
+    # 'normal' encoding
+    def self.to_unicode(str='', type = 'utf-16le', mode = '', size = '')
+      return '' if not str
+      case type
+        when 'utf-16le'
+          return str.unpack('C*').pack('v*')
+        when 'utf-16be'
+          return str.unpack('C*').pack('n*')
+        when 'utf-32le'
+          return str.unpack('C*').pack('V*')
+        when 'utf-32be'
+          return str.unpack('C*').pack('N*')
+        when 'utf-7'
+          case mode
+            when 'all'
+              return str.gsub(/./){ |a|
+                out = ''
+                if 'a' != '+'
+                  out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
+                end
+                '+' + out + '-'
+              }
+            else
+              return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
+                out = ''
+                if a != '+'
+                  out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
+                end
+                '+' + out + '-'
+              }
+          end
+        when 'utf-8'
+          if size == ''
+            size = 2
+          end
+          if size >= 2 and size <= 7
+            string = ''
+            str.each_byte { |a|
+              if (a < 21 || a > 0x7f) || mode != ''
+                # ugh.	turn a single byte into the binary representation of it, in array form
+                bin = [a].pack('C').unpack('B8')[0].split(//)
+                # even more ugh.
+                bin.collect!{|a_| a_.to_i}
+                out = Array.new(8 * size, 0)
+                0.upto(size - 1) { |i|
+                  out[i] = 1
+                  out[i * 8] = 1
+                }
+                i = 0
+                byte = 0
+                bin.reverse.each { |bit|
+                  if i < 6
+                    mod = (((size * 8) - 1) - byte * 8) - i
+                    out[mod] = bit
+                  else
+                    byte = byte + 1
+                    i = 0
+                    redo
+                  end
+                  i = i + 1
+                }
+                if mode != ''
+                  case mode
+                    when 'overlong'
+                      # do nothing, since we already handle this as above...
+                    when 'invalid'
+                      done = 0
+                      while done == 0
+                        # the ghetto...
+                        bits = [7, 8, 15, 16, 23, 24, 31, 32, 41]
+                        bits.each { |bit|
+                          bit = (size * 8) - bit
+                          if bit > 1
+                            set = rand(2)
+                            if out[bit] != set
+                              out[bit] = set
+                              done = 1
+                            end
+                          end
+                        }
+                      end
+                    else
+                      raise TypeError, 'Invalid mode.  Only "overlong" and "invalid" are acceptable modes for utf-8'
+                  end
+                end
+                string << [out.join('')].pack('B*')
+              else
+                string << [a].pack('C')
+              end
+            }
+            return string
+          else
+            raise TypeError, 'invalid utf-8 size'
+          end
+        when 'uhwtfms' # suggested name from HD :P
+          load_codepage()
+          string = ''
+          # overloading mode as codepage
+          if mode == ''
+            mode = 1252 # ANSI - Latan 1, default for US installs of MS products
+          else
+            mode = mode.to_i
+          end
+          if @@codepage_map_cache[mode].nil?
+            raise TypeError, "Invalid codepage #{mode}"
+          end
+          str.each_byte {|byte|
+            char = [byte].pack('C*')
+            possible = @@codepage_map_cache[mode]['data'][char]
+            if possible.nil?
+              raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
+            end
+            string << possible[ rand(possible.length) ]
+          }
+          return string
+        when 'uhwtfms-half' # suggested name from HD :P
+          load_codepage()
+          string = ''
+          # overloading mode as codepage
+          if mode == ''
+            mode = 1252 # ANSI - Latan 1, default for US installs of MS products
+          else
+            mode = mode.to_i
+          end
+          if mode != 1252
+            raise TypeError, "Invalid codepage #{mode}, only 1252 supported for uhwtfms_half"
+          end
+          str.each_byte {|byte|
+            if ((byte >= 33 && byte <= 63) || (byte >= 96 && byte <= 126))
+              string << "\xFF" + [byte ^ 32].pack('C')
+            elsif (byte >= 64 && byte <= 95)
+              string << "\xFF" + [byte ^ 96].pack('C')
+            else
+              char = [byte].pack('C')
+              possible = @@codepage_map_cache[mode]['data'][char]
+              if possible.nil?
+                raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
+              end
+              string << possible[ rand(possible.length) ]
+            end
+          }
+          return string
+        else
+          raise TypeError, 'invalid utf type'
+      end
+    end
+    #
+    # A custom unicode filter for dealing with multi-byte strings on a 8-bit console
+    # Punycode would have been more "standard", but it requires valid Unicode chars
+    #
+    def self.unicode_filter_encode(str)
+      if (str.to_s.unpack("C*") & ( LowAscii + HighAscii + "\x7f" ).unpack("C*")).length > 0
+        str = "$U$" + str.unpack("C*").select{|c| c < 0x7f and c > 0x1f and c != 0x2d}.pack("C*") + "-0x" + str.unpack("H*")[0]
+      else
+        str
+      end
+    end
+    def self.unicode_filter_decode(str)
+      str.to_s.gsub( /\$U\$([\x20-\x2c\x2e-\x7E]*)\-0x([A-Fa-f0-9]+)/n ){|m| [$2].pack("H*") }
+    end
+    # Converts US-ASCII to UTF-8, skipping over any characters which don't
+    # convert cleanly. This is a convenience method that wraps
+    # String#encode with non-raising default paramaters.
+    #
+    # @param str [String] An encodable ASCII string
+    # @return [String] a UTF-8 equivalent
+    # @note This method will discard invalid characters
+    def self.to_utf8(str)
+      str.encode('utf-8', { :invalid => :replace, :undef => :replace, :replace => '' })
+    end
+    #
+    # Returns a unicode escaped string for Javascript
+    #
+    def self.to_unescape(data, endian=ENDIAN_LITTLE, prefix='%%u')
+      data << "\x41" if (data.length % 2 != 0)
+      dptr = 0
+      buff = ''
+      while (dptr < data.length)
+        c1 = data[dptr,1].unpack("C*")[0]
+        dptr += 1
+        c2 = data[dptr,1].unpack("C*")[0]
+        dptr += 1
+        if (endian == ENDIAN_LITTLE)
+          buff << sprintf("#{prefix}%.2x%.2x", c2, c1)
+        else
+          buff << sprintf("#{prefix}%.2x%.2x", c1, c2)
+        end
+      end
+      return buff
+    end
+    #
+    # Converts a unicode string to standard ASCII text.
+    #
+    def self.to_ascii(str='', type = 'utf-16le', mode = '', size = '')
+      return '' if not str
+      case type
+        when 'utf-16le'
+          return str.unpack('v*').pack('C*')
+        when 'utf-16be'
+          return str.unpack('n*').pack('C*')
+        when 'utf-32le'
+          return str.unpack('V*').pack('C*')
+        when 'utf-32be'
+          return str.unpack('N*').pack('C*')
+        when 'utf-7'
+          raise TypeError, 'invalid utf type, not yet implemented'
+        when 'utf-8'
+          raise TypeError, 'invalid utf type, not yet implemented'
+        when 'uhwtfms' # suggested name from HD :P
+          raise TypeError, 'invalid utf type, not yet implemented'
+        when 'uhwtfms-half' # suggested name from HD :P
+          raise TypeError, 'invalid utf type, not yet implemented'
+        else
+          raise TypeError, 'invalid utf type'
+      end
+    end
+  end
+end