rex-text 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ # -*- coding: binary -*-
2
+ module Rex
3
+ module Text
4
+ # We are re-opening the module to add these module methods.
5
+ # Breaking them up this way allows us to maintain a little higher
6
+ # degree of organisation and make it easier to find what you're looking for
7
+ # without hanging the underlying calls that we historically rely upon.
8
+
9
+ #
10
+ # Converts a string to random case
11
+ #
12
+ # @example
13
+ # Rex::Text.to_rand_case("asdf") # => "asDf"
14
+ #
15
+ # @param str [String] The string to randomize
16
+ # @return [String]
17
+ # @see permute_case
18
+ # @see to_mixed_case_array
19
+ def self.to_rand_case(str)
20
+ buf = str.dup
21
+ 0.upto(str.length) do |i|
22
+ buf[i,1] = rand(2) == 0 ? str[i,1].upcase : str[i,1].downcase
23
+ end
24
+ return buf
25
+ end
26
+
27
+ #
28
+ # Takes a string, and returns an array of all mixed case versions.
29
+ #
30
+ # @example
31
+ # >> Rex::Text.to_mixed_case_array "abc1"
32
+ # => ["abc1", "abC1", "aBc1", "aBC1", "Abc1", "AbC1", "ABc1", "ABC1"]
33
+ #
34
+ # @param str [String] The string to randomize
35
+ # @return [Array<String>]
36
+ # @see permute_case
37
+ def self.to_mixed_case_array(str)
38
+ letters = []
39
+ str.scan(/./).each { |l| letters << [l.downcase, l.upcase] }
40
+ coords = []
41
+ (1 << str.size).times { |i| coords << ("%0#{str.size}b" % i) }
42
+ mixed = []
43
+ coords.each do |coord|
44
+ c = coord.scan(/./).map {|x| x.to_i}
45
+ this_str = ""
46
+ c.each_with_index { |d,i| this_str << letters[i][d] }
47
+ mixed << this_str
48
+ end
49
+ return mixed.uniq
50
+ end
51
+
52
+ #
53
+ # Randomize the whitespace in a string
54
+ #
55
+ def self.randomize_space(str)
56
+ set = ["\x09", "\x20", "\x0d", "\x0a"]
57
+ str.gsub(/\s+/) { |s|
58
+ len = rand(50)+2
59
+ buf = ''
60
+ while (buf.length < len)
61
+ buf << set.sample
62
+ end
63
+
64
+ buf
65
+ }
66
+ end
67
+
68
+ #
69
+ # Shuffles a byte stream
70
+ #
71
+ # @param str [String]
72
+ # @return [String] The shuffled result
73
+ # @see shuffle_a
74
+ def self.shuffle_s(str)
75
+ shuffle_a(str.unpack("C*")).pack("C*")
76
+ end
77
+
78
+ #
79
+ # Performs a Fisher-Yates shuffle on an array
80
+ #
81
+ # Modifies +arr+ in place
82
+ #
83
+ # @param arr [Array] The array to be shuffled
84
+ # @return [Array]
85
+ def self.shuffle_a(arr)
86
+ len = arr.length
87
+ max = len - 1
88
+ cyc = [* (0..max) ]
89
+ for d in cyc
90
+ e = rand(d+1)
91
+ next if e == d
92
+ f = arr[d];
93
+ g = arr[e];
94
+ arr[d] = g;
95
+ arr[e] = f;
96
+ end
97
+ return arr
98
+ end
99
+
100
+ # Permute the case of a word
101
+ def self.permute_case(word, idx=0)
102
+ res = []
103
+
104
+ if( (UpperAlpha+LowerAlpha).index(word[idx,1]))
105
+
106
+ word_ucase = word.dup
107
+ word_ucase[idx, 1] = word[idx, 1].upcase
108
+
109
+ word_lcase = word.dup
110
+ word_lcase[idx, 1] = word[idx, 1].downcase
111
+
112
+ if (idx == word.length)
113
+ return [word]
114
+ else
115
+ res << permute_case(word_ucase, idx+1)
116
+ res << permute_case(word_lcase, idx+1)
117
+ end
118
+ else
119
+ res << permute_case(word, idx+1)
120
+ end
121
+
122
+ res.flatten
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,59 @@
1
+ # -*- coding: binary -*-
2
+ module Rex
3
+ module Text
4
+ # We are re-opening the module to add these module methods.
5
+ # Breaking them up this way allows us to maintain a little higher
6
+ # degree of organisation and make it easier to find what you're looking for
7
+ # without hanging the underlying calls that we historically rely upon.
8
+
9
+ #
10
+ # Converts a string to one similar to what would be used by cowsay(1), a UNIX utility for
11
+ # displaying text as if it was coming from an ASCII-cow's mouth:
12
+ #
13
+ # __________________
14
+ # < the cow says moo >
15
+ # ------------------
16
+ # \ ^__^
17
+ # \ (oo)\_______
18
+ # (__)\ )\/\
19
+ # ||----w |
20
+ # || ||
21
+ #
22
+ # @param text [String] The string to cowsay
23
+ # @param width [Fixnum] Width of the cow's cloud. Default's to cowsay(1)'s default, 39.
24
+ def self.cowsay(text, width=39)
25
+ # cowsay(1) chunks a message up into 39-byte chunks and wraps it in '| ' and ' |'
26
+ # Rex::Text.wordwrap(text, 0, 39, ' |', '| ') almost does this, but won't
27
+ # split a word that has > 39 characters in it which results in oddly formed
28
+ # text in the cowsay banner, so just do it by hand. This big mess wraps
29
+ # the provided text in an ASCII-cloud and then makes it look like the cloud
30
+ # is a thought/word coming from the ASCII-cow. Each line in the
31
+ # ASCII-cloud is no more than the specified number-characters long, and the
32
+ # cloud corners are made to look rounded
33
+ text_lines = text.scan(Regexp.new(".{1,#{width-4}}"))
34
+ max_length = text_lines.map(&:size).sort.last
35
+ cloud_parts = []
36
+ cloud_parts << " #{'_' * (max_length + 2)}"
37
+ if text_lines.size == 1
38
+ cloud_parts << "< #{text} >"
39
+ else
40
+ cloud_parts << "/ #{text_lines.first.ljust(max_length, ' ')} \\"
41
+ if text_lines.size > 2
42
+ text_lines[1, text_lines.length - 2].each do |line|
43
+ cloud_parts << "| #{line.ljust(max_length, ' ')} |"
44
+ end
45
+ end
46
+ cloud_parts << "\\ #{text_lines.last.ljust(max_length, ' ')} /"
47
+ end
48
+ cloud_parts << " #{'-' * (max_length + 2)}"
49
+ cloud_parts << <<EOS
50
+ \\ ,__,
51
+ \\ (oo)____
52
+ (__) )\\
53
+ ||--|| *
54
+ EOS
55
+ cloud_parts.join("\n")
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,276 @@
1
+ # -*- coding: binary -*-
2
+ module Rex
3
+ module Text
4
+ # We are re-opening the module to add these module methods.
5
+ # Breaking them up this way allows us to maintain a little higher
6
+ # degree of organisation and make it easier to find what you're looking for
7
+ # without hanging the underlying calls that we historically rely upon.
8
+
9
+ #
10
+ # Converts standard ASCII text to a unicode string.
11
+ #
12
+ # Supported unicode types include: utf-16le, utf16-be, utf32-le,
13
+ # utf32-be, utf-7, and utf-8
14
+ #
15
+ # Providing 'mode' provides hints to the actual encoder as to how it
16
+ # should encode the string.
17
+ #
18
+ # Only UTF-7 and UTF-8 use "mode".
19
+ #
20
+ # utf-7 by default does not encode alphanumeric and a few other
21
+ # characters. By specifying the mode of "all", then all of the
22
+ # characters are encoded, not just the non-alphanumeric set.
23
+ # to_unicode(str, 'utf-7', 'all')
24
+ #
25
+ # utf-8 specifies that alphanumeric characters are used directly, eg
26
+ # "a" is just "a". However, there exist 6 different overlong
27
+ # encodings of "a" that are technically not valid, but parse just fine
28
+ # in most utf-8 parsers. (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1,
29
+ # 0xFC80808081A1, 0xFE8080808081A1). How many bytes to use for the
30
+ # overlong enocding is specified providing 'size'. to_unicode(str,
31
+ # 'utf-8', 'overlong', 2)
32
+ #
33
+ # Many utf-8 parsers also allow invalid overlong encodings, where bits
34
+ # that are unused when encoding a single byte are modified. Many
35
+ # parsers will ignore these bits, rendering simple string matching to
36
+ # be ineffective for dealing with UTF-8 strings. There are many more
37
+ # invalid overlong encodings possible for "a". For example, three
38
+ # encodings are available for an invalid 2 byte encoding of "a".
39
+ # (0xC1E1 0xC161 0xC121).
40
+ #
41
+ # By specifying "invalid", a random invalid encoding is chosen for the
42
+ # given byte size. to_unicode(str, 'utf-8', 'invalid', 2)
43
+ #
44
+ # utf-7 defaults to 'normal' utf-7 encoding utf-8 defaults to 2 byte
45
+ # 'normal' encoding
46
+ def self.to_unicode(str='', type = 'utf-16le', mode = '', size = '')
47
+ return '' if not str
48
+ case type
49
+ when 'utf-16le'
50
+ return str.unpack('C*').pack('v*')
51
+ when 'utf-16be'
52
+ return str.unpack('C*').pack('n*')
53
+ when 'utf-32le'
54
+ return str.unpack('C*').pack('V*')
55
+ when 'utf-32be'
56
+ return str.unpack('C*').pack('N*')
57
+ when 'utf-7'
58
+ case mode
59
+ when 'all'
60
+ return str.gsub(/./){ |a|
61
+ out = ''
62
+ if 'a' != '+'
63
+ out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
64
+ end
65
+ '+' + out + '-'
66
+ }
67
+ else
68
+ return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
69
+ out = ''
70
+ if a != '+'
71
+ out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
72
+ end
73
+ '+' + out + '-'
74
+ }
75
+ end
76
+ when 'utf-8'
77
+ if size == ''
78
+ size = 2
79
+ end
80
+
81
+ if size >= 2 and size <= 7
82
+ string = ''
83
+ str.each_byte { |a|
84
+ if (a < 21 || a > 0x7f) || mode != ''
85
+ # ugh. turn a single byte into the binary representation of it, in array form
86
+ bin = [a].pack('C').unpack('B8')[0].split(//)
87
+
88
+ # even more ugh.
89
+ bin.collect!{|a_| a_.to_i}
90
+
91
+ out = Array.new(8 * size, 0)
92
+
93
+ 0.upto(size - 1) { |i|
94
+ out[i] = 1
95
+ out[i * 8] = 1
96
+ }
97
+
98
+ i = 0
99
+ byte = 0
100
+ bin.reverse.each { |bit|
101
+ if i < 6
102
+ mod = (((size * 8) - 1) - byte * 8) - i
103
+ out[mod] = bit
104
+ else
105
+ byte = byte + 1
106
+ i = 0
107
+ redo
108
+ end
109
+ i = i + 1
110
+ }
111
+
112
+ if mode != ''
113
+ case mode
114
+ when 'overlong'
115
+ # do nothing, since we already handle this as above...
116
+ when 'invalid'
117
+ done = 0
118
+ while done == 0
119
+ # the ghetto...
120
+ bits = [7, 8, 15, 16, 23, 24, 31, 32, 41]
121
+ bits.each { |bit|
122
+ bit = (size * 8) - bit
123
+ if bit > 1
124
+ set = rand(2)
125
+ if out[bit] != set
126
+ out[bit] = set
127
+ done = 1
128
+ end
129
+ end
130
+ }
131
+ end
132
+ else
133
+ raise TypeError, 'Invalid mode. Only "overlong" and "invalid" are acceptable modes for utf-8'
134
+ end
135
+ end
136
+ string << [out.join('')].pack('B*')
137
+ else
138
+ string << [a].pack('C')
139
+ end
140
+ }
141
+ return string
142
+ else
143
+ raise TypeError, 'invalid utf-8 size'
144
+ end
145
+ when 'uhwtfms' # suggested name from HD :P
146
+ load_codepage()
147
+
148
+ string = ''
149
+ # overloading mode as codepage
150
+ if mode == ''
151
+ mode = 1252 # ANSI - Latan 1, default for US installs of MS products
152
+ else
153
+ mode = mode.to_i
154
+ end
155
+ if @@codepage_map_cache[mode].nil?
156
+ raise TypeError, "Invalid codepage #{mode}"
157
+ end
158
+ str.each_byte {|byte|
159
+ char = [byte].pack('C*')
160
+ possible = @@codepage_map_cache[mode]['data'][char]
161
+ if possible.nil?
162
+ raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
163
+ end
164
+ string << possible[ rand(possible.length) ]
165
+ }
166
+ return string
167
+ when 'uhwtfms-half' # suggested name from HD :P
168
+ load_codepage()
169
+ string = ''
170
+ # overloading mode as codepage
171
+ if mode == ''
172
+ mode = 1252 # ANSI - Latan 1, default for US installs of MS products
173
+ else
174
+ mode = mode.to_i
175
+ end
176
+ if mode != 1252
177
+ raise TypeError, "Invalid codepage #{mode}, only 1252 supported for uhwtfms_half"
178
+ end
179
+ str.each_byte {|byte|
180
+ if ((byte >= 33 && byte <= 63) || (byte >= 96 && byte <= 126))
181
+ string << "\xFF" + [byte ^ 32].pack('C')
182
+ elsif (byte >= 64 && byte <= 95)
183
+ string << "\xFF" + [byte ^ 96].pack('C')
184
+ else
185
+ char = [byte].pack('C')
186
+ possible = @@codepage_map_cache[mode]['data'][char]
187
+ if possible.nil?
188
+ raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
189
+ end
190
+ string << possible[ rand(possible.length) ]
191
+ end
192
+ }
193
+ return string
194
+ else
195
+ raise TypeError, 'invalid utf type'
196
+ end
197
+ end
198
+
199
+ #
200
+ # A custom unicode filter for dealing with multi-byte strings on a 8-bit console
201
+ # Punycode would have been more "standard", but it requires valid Unicode chars
202
+ #
203
+ def self.unicode_filter_encode(str)
204
+ if (str.to_s.unpack("C*") & ( LowAscii + HighAscii + "\x7f" ).unpack("C*")).length > 0
205
+ str = "$U$" + str.unpack("C*").select{|c| c < 0x7f and c > 0x1f and c != 0x2d}.pack("C*") + "-0x" + str.unpack("H*")[0]
206
+ else
207
+ str
208
+ end
209
+ end
210
+
211
+ def self.unicode_filter_decode(str)
212
+ str.to_s.gsub( /\$U\$([\x20-\x2c\x2e-\x7E]*)\-0x([A-Fa-f0-9]+)/n ){|m| [$2].pack("H*") }
213
+ end
214
+
215
+ # Converts US-ASCII to UTF-8, skipping over any characters which don't
216
+ # convert cleanly. This is a convenience method that wraps
217
+ # String#encode with non-raising default paramaters.
218
+ #
219
+ # @param str [String] An encodable ASCII string
220
+ # @return [String] a UTF-8 equivalent
221
+ # @note This method will discard invalid characters
222
+ def self.to_utf8(str)
223
+ str.encode('utf-8', { :invalid => :replace, :undef => :replace, :replace => '' })
224
+ end
225
+
226
+ #
227
+ # Returns a unicode escaped string for Javascript
228
+ #
229
+ def self.to_unescape(data, endian=ENDIAN_LITTLE, prefix='%%u')
230
+ data << "\x41" if (data.length % 2 != 0)
231
+ dptr = 0
232
+ buff = ''
233
+ while (dptr < data.length)
234
+ c1 = data[dptr,1].unpack("C*")[0]
235
+ dptr += 1
236
+ c2 = data[dptr,1].unpack("C*")[0]
237
+ dptr += 1
238
+
239
+ if (endian == ENDIAN_LITTLE)
240
+ buff << sprintf("#{prefix}%.2x%.2x", c2, c1)
241
+ else
242
+ buff << sprintf("#{prefix}%.2x%.2x", c1, c2)
243
+ end
244
+ end
245
+ return buff
246
+ end
247
+
248
+ #
249
+ # Converts a unicode string to standard ASCII text.
250
+ #
251
+ def self.to_ascii(str='', type = 'utf-16le', mode = '', size = '')
252
+ return '' if not str
253
+ case type
254
+ when 'utf-16le'
255
+ return str.unpack('v*').pack('C*')
256
+ when 'utf-16be'
257
+ return str.unpack('n*').pack('C*')
258
+ when 'utf-32le'
259
+ return str.unpack('V*').pack('C*')
260
+ when 'utf-32be'
261
+ return str.unpack('N*').pack('C*')
262
+ when 'utf-7'
263
+ raise TypeError, 'invalid utf type, not yet implemented'
264
+ when 'utf-8'
265
+ raise TypeError, 'invalid utf type, not yet implemented'
266
+ when 'uhwtfms' # suggested name from HD :P
267
+ raise TypeError, 'invalid utf type, not yet implemented'
268
+ when 'uhwtfms-half' # suggested name from HD :P
269
+ raise TypeError, 'invalid utf type, not yet implemented'
270
+ else
271
+ raise TypeError, 'invalid utf type'
272
+ end
273
+ end
274
+
275
+ end
276
+ end