marshal-structure 1.1.1 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,449 @@
1
+ class Marshal::Structure::Tokenizer
2
+
3
+ ##
4
+ # Supported major Marshal version
5
+
6
+ MAJOR_VERSION = 4
7
+
8
+ ##
9
+ # Supported minor Marshal version
10
+
11
+ MINOR_VERSION = 8
12
+
13
+ ##
14
+ # nil type prefix
15
+
16
+ TYPE_NIL = '0'
17
+
18
+ ##
19
+ # true type prefix
20
+
21
+ TYPE_TRUE = 'T'
22
+
23
+ ##
24
+ # false type prefix
25
+
26
+ TYPE_FALSE = 'F'
27
+
28
+ ##
29
+ # Fixnum type prefix
30
+
31
+ TYPE_FIXNUM = 'i'
32
+
33
+ ##
34
+ # An object that has been extended with a module
35
+
36
+ TYPE_EXTENDED = 'e'
37
+
38
+ ##
39
+ # A subclass of a built-in type
40
+
41
+ TYPE_UCLASS = 'C'
42
+
43
+ ##
44
+ # A ruby Object
45
+
46
+ TYPE_OBJECT = 'o'
47
+
48
+ ##
49
+ # A wrapped C pointer
50
+
51
+ TYPE_DATA = 'd'
52
+
53
+ ##
54
+ # An object saved with _dump
55
+
56
+ TYPE_USERDEF = 'u'
57
+
58
+ ##
59
+ # An object saved with marshal_dump
60
+
61
+ TYPE_USRMARSHAL = 'U'
62
+
63
+ ##
64
+ # A Float
65
+
66
+ TYPE_FLOAT = 'f'
67
+
68
+ ##
69
+ # A Bignum
70
+
71
+ TYPE_BIGNUM = 'l'
72
+
73
+ ##
74
+ # A String
75
+
76
+ TYPE_STRING = '"'
77
+
78
+ ##
79
+ # A Regexp
80
+
81
+ TYPE_REGEXP = '/'
82
+
83
+ ##
84
+ # An Array
85
+
86
+ TYPE_ARRAY = '['
87
+
88
+ ##
89
+ # A Hash
90
+
91
+ TYPE_HASH = '{'
92
+
93
+ ##
94
+ # A Hash with a default value (not proc)
95
+
96
+ TYPE_HASH_DEF = '}'
97
+
98
+ ##
99
+ # A Struct
100
+
101
+ TYPE_STRUCT = 'S'
102
+
103
+ ##
104
+ # An old-style Module (reference, not content)
105
+ #
106
+ # I'm not sure what makes this old. The byte stream is identical to
107
+ # TYPE_MODULE
108
+
109
+ TYPE_MODULE_OLD = 'M'
110
+
111
+ ##
112
+ # A class (reference, not content)
113
+
114
+ TYPE_CLASS = 'c'
115
+
116
+ ##
117
+ # A module (reference, not content)
118
+
119
+ TYPE_MODULE = 'm'
120
+
121
+ ##
122
+ # A Symbol
123
+
124
+ TYPE_SYMBOL = ':'
125
+
126
+ ##
127
+ # A reference to a previously Symbol
128
+
129
+ TYPE_SYMLINK = ';'
130
+
131
+ ##
132
+ # Instance variables for a following object
133
+
134
+ TYPE_IVAR = 'I'
135
+
136
+ ##
137
+ # A reference to a previously-stored Object
138
+
139
+ TYPE_LINK = '@'
140
+
141
+ TYPE_MAP = Hash.new do |_, type| # :nodoc:
142
+ raise Marshal::Structure::Error, "unknown type #{type.inspect}"
143
+ end
144
+
145
+ TYPE_MAP[TYPE_ARRAY] = :array
146
+ TYPE_MAP[TYPE_BIGNUM] = :bignum
147
+ TYPE_MAP[TYPE_CLASS] = :class
148
+ TYPE_MAP[TYPE_DATA] = :data
149
+ TYPE_MAP[TYPE_EXTENDED] = :extended
150
+ TYPE_MAP[TYPE_FALSE] = :false
151
+ TYPE_MAP[TYPE_FIXNUM] = :fixnum
152
+ TYPE_MAP[TYPE_FLOAT] = :float
153
+ TYPE_MAP[TYPE_HASH] = :hash
154
+ TYPE_MAP[TYPE_HASH_DEF] = :hash_default
155
+ TYPE_MAP[TYPE_IVAR] = :instance_variables
156
+ TYPE_MAP[TYPE_LINK] = :link
157
+ TYPE_MAP[TYPE_MODULE] = :module
158
+ TYPE_MAP[TYPE_MODULE_OLD] = :module_old
159
+ TYPE_MAP[TYPE_NIL] = :nil
160
+ TYPE_MAP[TYPE_OBJECT] = :object
161
+ TYPE_MAP[TYPE_REGEXP] = :regexp
162
+ TYPE_MAP[TYPE_STRING] = :string
163
+ TYPE_MAP[TYPE_STRUCT] = :struct
164
+ TYPE_MAP[TYPE_SYMBOL] = :symbol
165
+ TYPE_MAP[TYPE_SYMLINK] = :symbol_link
166
+ TYPE_MAP[TYPE_TRUE] = :true
167
+ TYPE_MAP[TYPE_UCLASS] = :user_class
168
+ TYPE_MAP[TYPE_USERDEF] = :user_defined
169
+ TYPE_MAP[TYPE_USRMARSHAL] = :user_marshal
170
+
171
+ ##
172
+ # Creates a Tokenizer for a Marshal stream in String +stream+.
173
+
174
+ def initialize stream
175
+ @byte_array = stream.bytes.to_a
176
+ @consumed = 2
177
+ @state = [:any]
178
+ @stream = stream
179
+ @stream.force_encoding Encoding::BINARY
180
+ end
181
+
182
+ ##
183
+ # Consumes one byte from the marshal stream
184
+
185
+ def byte
186
+ raise Marshal::Structure::EndOfMarshal.new(@consumed, 1) if
187
+ @consumed >= @byte_array.size
188
+
189
+ data = @byte_array[@consumed]
190
+ @consumed += 1
191
+
192
+ data
193
+ end
194
+
195
+ ##
196
+ # Consumes +count+ bytes from the marshal stream as an Array of bytes
197
+
198
+ def byte_array count
199
+ bytes(count).bytes.to_a
200
+ end
201
+
202
+ ##
203
+ # Consumes a sequence of bytes from the marshal stream based on the next
204
+ # integer
205
+
206
+ def byte_sequence
207
+ size = long
208
+ bytes size
209
+ end
210
+
211
+ ##
212
+ # Consumes +count+ from the marshal stream
213
+
214
+ def bytes count
215
+ raise Marshal::Structure::EndOfMarshal.new(@consumed, count) if
216
+ @consumed + count > @stream.size
217
+
218
+ data = @stream[@consumed, count]
219
+ @consumed += count
220
+ data
221
+ end
222
+
223
+ ##
224
+ # Consumes one byte from the marshal stream and returns a character
225
+
226
+ def character
227
+ byte.chr
228
+ end
229
+
230
+ ##
231
+ # Checks if the stream starts with a compatible marshal version
232
+
233
+ def check_version
234
+ major = @stream[0].ord
235
+ minor = @stream[1].ord
236
+
237
+ return if major == MAJOR_VERSION and minor <= MINOR_VERSION
238
+
239
+ raise TypeError, "incompatible marshal file format (can't be read)\n\tformat version #{MAJOR_VERSION}.#{MINOR_VERSION} required; #{major}.#{minor} given"
240
+ end
241
+
242
+ ##
243
+ # Decodes a stored C long
244
+
245
+ def long
246
+ c = byte
247
+
248
+ return 0 if c == 0
249
+
250
+ # convert to signed integer
251
+ c = (c ^ 0x80) - 0x80
252
+
253
+ if c > 0 then
254
+ return c - 5 if 4 < c
255
+
256
+ x = 0
257
+
258
+ c.times do |i|
259
+ x |= byte << (8 * i)
260
+ end
261
+
262
+ x
263
+ else
264
+ return c + 5 if c < -4
265
+
266
+ x = -1
267
+
268
+ (-c).times do |i|
269
+ factor = 8 * i
270
+ x &= ~(0xff << factor)
271
+ x |= byte << factor
272
+ end
273
+
274
+ x
275
+ end
276
+ end
277
+
278
+ ##
279
+ # Attempts to retrieve the next token from the stream. You may need to call
280
+ # next_token twice to receive a token as the current token may be
281
+ # incomplete.
282
+
283
+ def next_token # :nodoc:
284
+ current_state = @state.pop
285
+
286
+ case current_state
287
+ when :any then tokenize_any
288
+ when :array then tokenize_array
289
+ when :bignum then tokenize_bignum
290
+ when :byte then byte
291
+ when :bytes,
292
+ :class, :module, :module_old,
293
+ :float, :string, :symbol then byte_sequence
294
+ when :data then tokenize_data
295
+ when :extended then tokenize_extended
296
+ when :fixnum, :link, :symbol_link then long
297
+ when :hash, :pairs then tokenize_pairs
298
+ when :hash_default then tokenize_hash_default
299
+ when :instance_variables then tokenize_instance_variables
300
+ when :object then tokenize_object
301
+ when :regexp then tokenize_regexp
302
+ when :struct then tokenize_struct
303
+ when :sym then tokenize_sym
304
+ when :user_class then tokenize_user_class
305
+ when :user_defined then tokenize_user_defined
306
+ when :user_marshal then tokenize_user_marshal
307
+ else
308
+ raise Marshal::Structure::Error,
309
+ "bug: unknown state #{current_state.inspect}"
310
+ end
311
+ end
312
+
313
+ ##
314
+ # Returns an Enumerator that will tokenize the Marshal stream.
315
+
316
+ def tokens
317
+ check_version
318
+
319
+ Enumerator.new do |yielder|
320
+ until @state.empty? do
321
+ token = next_token
322
+
323
+ yielder << token if token
324
+ end
325
+ end
326
+ end
327
+
328
+ def tokenize_any # :nodoc:
329
+ item_type = TYPE_MAP[character]
330
+
331
+ @state.push item_type unless [:nil, :true, :false].include? item_type
332
+
333
+ item_type
334
+ end
335
+
336
+ def tokenize_array # :nodoc:
337
+ size = long
338
+
339
+ @state.concat Array.new(size, :any)
340
+
341
+ size
342
+ end
343
+
344
+ def tokenize_bignum # :nodoc:
345
+ sign = byte == 45 ? -1 : 1
346
+ size = long * 2
347
+
348
+ result = 0
349
+
350
+ bytes = byte_array size
351
+
352
+ bytes.each_with_index do |byte, exp|
353
+ result += (byte * 2**(exp*8))
354
+ end
355
+
356
+ sign * result
357
+ end
358
+
359
+ def tokenize_data # :nodoc:
360
+ @state.push :any
361
+ @state.push :sym
362
+
363
+ next_token
364
+ end
365
+
366
+ alias tokenize_extended tokenize_data # :nodoc:
367
+
368
+ def tokenize_hash_default # :nodoc:
369
+ size = long
370
+
371
+ @state.push :any
372
+ @state.push size * 2 if size > 0
373
+
374
+ size
375
+ end
376
+
377
+ def tokenize_instance_variables # :nodoc:
378
+ @state.push :pairs
379
+ @state.push :any
380
+
381
+ next_token
382
+ end
383
+
384
+ ##
385
+ # For multipart objects like arrays and hashes a count of items is pushed
386
+ # onto the stack. This method re-pushes an :any onto the stack until the
387
+ # correct number of tokens have been created from the stream.
388
+
389
+ def tokenize_next_any current_state # :nodoc:
390
+ next_state = current_state - 1
391
+ @state.push next_state if current_state > 0
392
+ @state.push :any
393
+
394
+ next_token
395
+ end
396
+
397
+ def tokenize_object # :nodoc:
398
+ @state.push :pairs
399
+ @state.push :sym
400
+
401
+ next_token
402
+ end
403
+
404
+ def tokenize_pairs # :nodoc:
405
+ size = long
406
+
407
+ @state.concat Array.new(size * 2, :any)
408
+
409
+ size
410
+ end
411
+
412
+ def tokenize_regexp # :nodoc:
413
+ @state.push :byte
414
+
415
+ byte_sequence
416
+ end
417
+
418
+ def tokenize_struct # :nodoc:
419
+ @state.push :pairs
420
+ @state.push :sym
421
+
422
+ next_token
423
+ end
424
+
425
+ def tokenize_sym # :nodoc:
426
+ item_type = TYPE_MAP[character]
427
+
428
+ raise Marshal::Structure::Error,
429
+ "expected symbol type, got #{item_type.inspect}" unless
430
+ [:symbol, :symbol_link].include? item_type
431
+
432
+ @state.push item_type
433
+
434
+ item_type
435
+ end
436
+
437
+ alias tokenize_user_class tokenize_data # :nodoc:
438
+
439
+ def tokenize_user_defined # :nodoc:
440
+ @state.push :bytes
441
+ @state.push :sym
442
+
443
+ next_token
444
+ end
445
+
446
+ alias tokenize_user_marshal tokenize_data # :nodoc:
447
+
448
+ end
449
+
@@ -1,170 +1,36 @@
1
- require 'minitest/autorun'
2
- require 'marshal/structure'
3
- require 'ben_string'
4
- require 'openssl'
5
- require 'pp'
1
+ require 'marshal/structure/test_case'
6
2
 
7
- class OpenSSL::X509::Name
8
- alias _dump_data to_a
3
+ class TestMarshalStructure < Marshal::Structure::TestCase
9
4
 
10
- def _load_data ary
11
- ary.each do |entry|
12
- add_entry(*entry)
5
+ def test_class_load
6
+ ary = %W[\x04 \x08 T]
7
+ def ary.getc
8
+ shift
13
9
  end
14
- end
15
- end
16
-
17
- class B; end
18
-
19
- module C; end
20
-
21
- module E; end
22
-
23
- class M
24
- def marshal_dump
25
- 'marshal_dump'
26
- end
27
-
28
- def marshal_load o
29
- end
30
- end
31
-
32
- class U
33
- def self._load str
34
- new
35
- end
36
-
37
- def _dump limit
38
- s = '_dump'
39
- s.instance_variable_set :@ivar_on_dump_str, 'value on ivar on dump str'
40
- s
41
- end
42
- end
43
-
44
- S = Struct.new :f
45
-
46
- class TestMarshalStructure < MiniTest::Unit::TestCase
47
10
 
48
- def mu_pp obj
49
- s = ''
50
- s = PP.pp obj, s
51
- s.chomp
52
- end
11
+ result = @MS.load ary
53
12
 
54
- def setup
55
- @MS = Marshal::Structure
13
+ assert_equal :true, result
56
14
  end
57
15
 
58
- def test_construct
59
- str =
60
- "\004\b{\006:\006a[\031c\006Bm\006C\"\006d/\006e\000i\006" \
61
- "f\0322.2999999999999998\000ff" \
62
- "l+\n\000\000\000\000\000\000\000\000\001\0000TF}\000i\000" \
63
- "S:\006S\006:\006fi\000o:\vObject\000@\017" \
64
- "U:\006M\"\021marshal_dump" \
65
- "Iu:\006U\n_dump\006" \
66
- ":\026@ivar_on_dump_str\"\036value on ivar on dump str" \
67
- ";\000e:\006Eo;\b\000" \
68
- "I\"\025string with ivar\006:\v@value\"\017some value" \
69
- "C:\016BenString\"\000"
70
-
71
- structure = @MS.load str
72
-
73
- expected = [
74
- :hash,
75
- 0,
76
- 1,
77
- [:symbol, 0, "a"],
78
- [:array,
79
- 1,
80
- 20,
81
- [:class, 2, "B"],
82
- [:module, 3, "C"],
83
- [:string, 4, "d"],
84
- [:regexp, 5, "e", 0],
85
- [:fixnum, 1],
86
- [:float, 6, "2.2999999999999998\000ff"],
87
- [:bignum, 7, 1, 10, 18446744073709551616],
88
- :nil,
89
- :true,
90
- :false,
91
- [:hash_default, 8, 0, [:fixnum, 0]],
92
- [:struct, 9, [:symbol, 1, "S"], 1, [:symbol, 2, "f"], [:fixnum, 0]],
93
- [:object, 10, [:symbol, 3, "Object"], [0]],
94
- [:link, 10],
95
- [:user_marshal, 11, [:symbol, 4, "M"], [:string, 12, "marshal_dump"]],
96
- [:instance_variables,
97
- [:user_defined, 13, [:symbol, 5, "U"], "_dump"],
98
- 1,
99
- [:symbol, 6, "@ivar_on_dump_str"],
100
- [:string, 14, "value on ivar on dump str"]],
101
- [:symbol_link, 0],
102
- [:extended, [:symbol, 7, "E"], [:object, 15, [:symbol_link, 3], [0]]],
103
- [:instance_variables,
104
- [:string, 16, "string with ivar"],
105
- 1,
106
- [:symbol, 8, "@value"],
107
- [:string, 17, "some value"]],
108
- [:user_class, [:symbol, 9, "BenString"], [:string, 18, ""]]]]
109
-
110
- assert_equal expected, structure
16
+ def test_count_allocations
17
+ assert_equal 1, @MS.new("\x04\x08[\x06T").count_allocations
111
18
  end
112
19
 
113
- def test_construct_data
114
- name = OpenSSL::X509::Name.parse 'CN=nobody/DC=example'
115
- str = Marshal.dump name
116
-
117
- expected = [
118
- :data,
119
- 0,
120
- [:symbol, 0, "OpenSSL::X509::Name"],
121
- [:array,
122
- 1,
123
- 2,
124
- [:array, 2, 3,
125
- [:string, 3, "CN"],
126
- [:string, 4, "nobody"],
127
- [:fixnum, 12]],
128
- [:array, 5, 3,
129
- [:string, 6, "DC"],
130
- [:string, 7, "example"],
131
- [:fixnum, 22]]]]
132
-
133
- assert_equal expected, @MS.load(str)
134
- end
135
-
136
- def test_construct_module_old
137
- assert_equal [:module, 0, "M"], @MS.load("\x04\x08M\x06M")
20
+ def test_load
21
+ assert_equal [true], @MS.new("\x04\x08[\x06T").load
138
22
  end
139
23
 
140
- def test_consume
141
- ms = @MS.new "\x04\x08\x06M"
142
-
143
- assert_equal "\x06M", ms.consume(2)
24
+ def test_structure
25
+ assert_equal [:array, 0, 1, :true], @MS.new("\x04\x08[\x06T").structure
144
26
  end
145
27
 
146
- def test_consume_bytes
147
- ms = @MS.new "\x04\x08\x06M"
148
-
149
- assert_equal [6, 77], ms.consume_bytes(2)
150
- end
151
-
152
- def test_consume_byte
153
- ms = @MS.new "\x04\x08M"
154
-
155
- assert_equal 77, ms.consume_byte
156
- end
157
-
158
- def test_consume_character
159
- ms = @MS.new "\x04\x08M"
160
-
161
- assert_equal 'M', ms.consume_character
162
- end
28
+ def test_token_stream
29
+ stream = @MS.new("\x04\x08[\x06T").token_stream
163
30
 
164
- def test_get_byte_sequence
165
- ms = @MS.new "\x04\x08\x06M"
31
+ assert_kind_of Enumerator, stream
166
32
 
167
- assert_equal "M", ms.get_byte_sequence
33
+ assert_equal [:array, 1, :true], stream.to_a
168
34
  end
169
35
 
170
36
  end