marshal-structure 1.1.1 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,449 @@
1
+ class Marshal::Structure::Tokenizer
2
+
3
+ ##
4
+ # Supported major Marshal version
5
+
6
+ MAJOR_VERSION = 4
7
+
8
+ ##
9
+ # Supported minor Marshal version
10
+
11
+ MINOR_VERSION = 8
12
+
13
+ ##
14
+ # nil type prefix
15
+
16
+ TYPE_NIL = '0'
17
+
18
+ ##
19
+ # true type prefix
20
+
21
+ TYPE_TRUE = 'T'
22
+
23
+ ##
24
+ # false type prefix
25
+
26
+ TYPE_FALSE = 'F'
27
+
28
+ ##
29
+ # Fixnum type prefix
30
+
31
+ TYPE_FIXNUM = 'i'
32
+
33
+ ##
34
+ # An object that has been extended with a module
35
+
36
+ TYPE_EXTENDED = 'e'
37
+
38
+ ##
39
+ # A subclass of a built-in type
40
+
41
+ TYPE_UCLASS = 'C'
42
+
43
+ ##
44
+ # A ruby Object
45
+
46
+ TYPE_OBJECT = 'o'
47
+
48
+ ##
49
+ # A wrapped C pointer
50
+
51
+ TYPE_DATA = 'd'
52
+
53
+ ##
54
+ # An object saved with _dump
55
+
56
+ TYPE_USERDEF = 'u'
57
+
58
+ ##
59
+ # An object saved with marshal_dump
60
+
61
+ TYPE_USRMARSHAL = 'U'
62
+
63
+ ##
64
+ # A Float
65
+
66
+ TYPE_FLOAT = 'f'
67
+
68
+ ##
69
+ # A Bignum
70
+
71
+ TYPE_BIGNUM = 'l'
72
+
73
+ ##
74
+ # A String
75
+
76
+ TYPE_STRING = '"'
77
+
78
+ ##
79
+ # A Regexp
80
+
81
+ TYPE_REGEXP = '/'
82
+
83
+ ##
84
+ # An Array
85
+
86
+ TYPE_ARRAY = '['
87
+
88
+ ##
89
+ # A Hash
90
+
91
+ TYPE_HASH = '{'
92
+
93
+ ##
94
+ # A Hash with a default value (not proc)
95
+
96
+ TYPE_HASH_DEF = '}'
97
+
98
+ ##
99
+ # A Struct
100
+
101
+ TYPE_STRUCT = 'S'
102
+
103
+ ##
104
+ # An old-style Module (reference, not content)
105
+ #
106
+ # I'm not sure what makes this old. The byte stream is identical to
107
+ # TYPE_MODULE
108
+
109
+ TYPE_MODULE_OLD = 'M'
110
+
111
+ ##
112
+ # A class (reference, not content)
113
+
114
+ TYPE_CLASS = 'c'
115
+
116
+ ##
117
+ # A module (reference, not content)
118
+
119
+ TYPE_MODULE = 'm'
120
+
121
+ ##
122
+ # A Symbol
123
+
124
+ TYPE_SYMBOL = ':'
125
+
126
+ ##
127
+ # A reference to a previously Symbol
128
+
129
+ TYPE_SYMLINK = ';'
130
+
131
+ ##
132
+ # Instance variables for a following object
133
+
134
+ TYPE_IVAR = 'I'
135
+
136
+ ##
137
+ # A reference to a previously-stored Object
138
+
139
+ TYPE_LINK = '@'
140
+
141
+ TYPE_MAP = Hash.new do |_, type| # :nodoc:
142
+ raise Marshal::Structure::Error, "unknown type #{type.inspect}"
143
+ end
144
+
145
+ TYPE_MAP[TYPE_ARRAY] = :array
146
+ TYPE_MAP[TYPE_BIGNUM] = :bignum
147
+ TYPE_MAP[TYPE_CLASS] = :class
148
+ TYPE_MAP[TYPE_DATA] = :data
149
+ TYPE_MAP[TYPE_EXTENDED] = :extended
150
+ TYPE_MAP[TYPE_FALSE] = :false
151
+ TYPE_MAP[TYPE_FIXNUM] = :fixnum
152
+ TYPE_MAP[TYPE_FLOAT] = :float
153
+ TYPE_MAP[TYPE_HASH] = :hash
154
+ TYPE_MAP[TYPE_HASH_DEF] = :hash_default
155
+ TYPE_MAP[TYPE_IVAR] = :instance_variables
156
+ TYPE_MAP[TYPE_LINK] = :link
157
+ TYPE_MAP[TYPE_MODULE] = :module
158
+ TYPE_MAP[TYPE_MODULE_OLD] = :module_old
159
+ TYPE_MAP[TYPE_NIL] = :nil
160
+ TYPE_MAP[TYPE_OBJECT] = :object
161
+ TYPE_MAP[TYPE_REGEXP] = :regexp
162
+ TYPE_MAP[TYPE_STRING] = :string
163
+ TYPE_MAP[TYPE_STRUCT] = :struct
164
+ TYPE_MAP[TYPE_SYMBOL] = :symbol
165
+ TYPE_MAP[TYPE_SYMLINK] = :symbol_link
166
+ TYPE_MAP[TYPE_TRUE] = :true
167
+ TYPE_MAP[TYPE_UCLASS] = :user_class
168
+ TYPE_MAP[TYPE_USERDEF] = :user_defined
169
+ TYPE_MAP[TYPE_USRMARSHAL] = :user_marshal
170
+
171
+ ##
172
+ # Creates a Tokenizer for a Marshal stream in String +stream+.
173
+
174
+ def initialize stream
175
+ @byte_array = stream.bytes.to_a
176
+ @consumed = 2
177
+ @state = [:any]
178
+ @stream = stream
179
+ @stream.force_encoding Encoding::BINARY
180
+ end
181
+
182
+ ##
183
+ # Consumes one byte from the marshal stream
184
+
185
+ def byte
186
+ raise Marshal::Structure::EndOfMarshal.new(@consumed, 1) if
187
+ @consumed >= @byte_array.size
188
+
189
+ data = @byte_array[@consumed]
190
+ @consumed += 1
191
+
192
+ data
193
+ end
194
+
195
+ ##
196
+ # Consumes +count+ bytes from the marshal stream as an Array of bytes
197
+
198
+ def byte_array count
199
+ bytes(count).bytes.to_a
200
+ end
201
+
202
+ ##
203
+ # Consumes a sequence of bytes from the marshal stream based on the next
204
+ # integer
205
+
206
+ def byte_sequence
207
+ size = long
208
+ bytes size
209
+ end
210
+
211
+ ##
212
+ # Consumes +count+ from the marshal stream
213
+
214
+ def bytes count
215
+ raise Marshal::Structure::EndOfMarshal.new(@consumed, count) if
216
+ @consumed + count > @stream.size
217
+
218
+ data = @stream[@consumed, count]
219
+ @consumed += count
220
+ data
221
+ end
222
+
223
+ ##
224
+ # Consumes one byte from the marshal stream and returns a character
225
+
226
+ def character
227
+ byte.chr
228
+ end
229
+
230
+ ##
231
+ # Checks if the stream starts with a compatible marshal version
232
+
233
+ def check_version
234
+ major = @stream[0].ord
235
+ minor = @stream[1].ord
236
+
237
+ return if major == MAJOR_VERSION and minor <= MINOR_VERSION
238
+
239
+ raise TypeError, "incompatible marshal file format (can't be read)\n\tformat version #{MAJOR_VERSION}.#{MINOR_VERSION} required; #{major}.#{minor} given"
240
+ end
241
+
242
+ ##
243
+ # Decodes a stored C long
244
+
245
+ def long
246
+ c = byte
247
+
248
+ return 0 if c == 0
249
+
250
+ # convert to signed integer
251
+ c = (c ^ 0x80) - 0x80
252
+
253
+ if c > 0 then
254
+ return c - 5 if 4 < c
255
+
256
+ x = 0
257
+
258
+ c.times do |i|
259
+ x |= byte << (8 * i)
260
+ end
261
+
262
+ x
263
+ else
264
+ return c + 5 if c < -4
265
+
266
+ x = -1
267
+
268
+ (-c).times do |i|
269
+ factor = 8 * i
270
+ x &= ~(0xff << factor)
271
+ x |= byte << factor
272
+ end
273
+
274
+ x
275
+ end
276
+ end
277
+
278
+ ##
279
+ # Attempts to retrieve the next token from the stream. You may need to call
280
+ # next_token twice to receive a token as the current token may be
281
+ # incomplete.
282
+
283
+ def next_token # :nodoc:
284
+ current_state = @state.pop
285
+
286
+ case current_state
287
+ when :any then tokenize_any
288
+ when :array then tokenize_array
289
+ when :bignum then tokenize_bignum
290
+ when :byte then byte
291
+ when :bytes,
292
+ :class, :module, :module_old,
293
+ :float, :string, :symbol then byte_sequence
294
+ when :data then tokenize_data
295
+ when :extended then tokenize_extended
296
+ when :fixnum, :link, :symbol_link then long
297
+ when :hash, :pairs then tokenize_pairs
298
+ when :hash_default then tokenize_hash_default
299
+ when :instance_variables then tokenize_instance_variables
300
+ when :object then tokenize_object
301
+ when :regexp then tokenize_regexp
302
+ when :struct then tokenize_struct
303
+ when :sym then tokenize_sym
304
+ when :user_class then tokenize_user_class
305
+ when :user_defined then tokenize_user_defined
306
+ when :user_marshal then tokenize_user_marshal
307
+ else
308
+ raise Marshal::Structure::Error,
309
+ "bug: unknown state #{current_state.inspect}"
310
+ end
311
+ end
312
+
313
+ ##
314
+ # Returns an Enumerator that will tokenize the Marshal stream.
315
+
316
+ def tokens
317
+ check_version
318
+
319
+ Enumerator.new do |yielder|
320
+ until @state.empty? do
321
+ token = next_token
322
+
323
+ yielder << token if token
324
+ end
325
+ end
326
+ end
327
+
328
+ def tokenize_any # :nodoc:
329
+ item_type = TYPE_MAP[character]
330
+
331
+ @state.push item_type unless [:nil, :true, :false].include? item_type
332
+
333
+ item_type
334
+ end
335
+
336
+ def tokenize_array # :nodoc:
337
+ size = long
338
+
339
+ @state.concat Array.new(size, :any)
340
+
341
+ size
342
+ end
343
+
344
+ def tokenize_bignum # :nodoc:
345
+ sign = byte == 45 ? -1 : 1
346
+ size = long * 2
347
+
348
+ result = 0
349
+
350
+ bytes = byte_array size
351
+
352
+ bytes.each_with_index do |byte, exp|
353
+ result += (byte * 2**(exp*8))
354
+ end
355
+
356
+ sign * result
357
+ end
358
+
359
+ def tokenize_data # :nodoc:
360
+ @state.push :any
361
+ @state.push :sym
362
+
363
+ next_token
364
+ end
365
+
366
+ alias tokenize_extended tokenize_data # :nodoc:
367
+
368
+ def tokenize_hash_default # :nodoc:
369
+ size = long
370
+
371
+ @state.push :any
372
+ @state.push size * 2 if size > 0
373
+
374
+ size
375
+ end
376
+
377
+ def tokenize_instance_variables # :nodoc:
378
+ @state.push :pairs
379
+ @state.push :any
380
+
381
+ next_token
382
+ end
383
+
384
+ ##
385
+ # For multipart objects like arrays and hashes a count of items is pushed
386
+ # onto the stack. This method re-pushes an :any onto the stack until the
387
+ # correct number of tokens have been created from the stream.
388
+
389
+ def tokenize_next_any current_state # :nodoc:
390
+ next_state = current_state - 1
391
+ @state.push next_state if current_state > 0
392
+ @state.push :any
393
+
394
+ next_token
395
+ end
396
+
397
+ def tokenize_object # :nodoc:
398
+ @state.push :pairs
399
+ @state.push :sym
400
+
401
+ next_token
402
+ end
403
+
404
+ def tokenize_pairs # :nodoc:
405
+ size = long
406
+
407
+ @state.concat Array.new(size * 2, :any)
408
+
409
+ size
410
+ end
411
+
412
+ def tokenize_regexp # :nodoc:
413
+ @state.push :byte
414
+
415
+ byte_sequence
416
+ end
417
+
418
+ def tokenize_struct # :nodoc:
419
+ @state.push :pairs
420
+ @state.push :sym
421
+
422
+ next_token
423
+ end
424
+
425
+ def tokenize_sym # :nodoc:
426
+ item_type = TYPE_MAP[character]
427
+
428
+ raise Marshal::Structure::Error,
429
+ "expected symbol type, got #{item_type.inspect}" unless
430
+ [:symbol, :symbol_link].include? item_type
431
+
432
+ @state.push item_type
433
+
434
+ item_type
435
+ end
436
+
437
+ alias tokenize_user_class tokenize_data # :nodoc:
438
+
439
+ def tokenize_user_defined # :nodoc:
440
+ @state.push :bytes
441
+ @state.push :sym
442
+
443
+ next_token
444
+ end
445
+
446
+ alias tokenize_user_marshal tokenize_data # :nodoc:
447
+
448
+ end
449
+
@@ -1,170 +1,36 @@
1
- require 'minitest/autorun'
2
- require 'marshal/structure'
3
- require 'ben_string'
4
- require 'openssl'
5
- require 'pp'
1
+ require 'marshal/structure/test_case'
6
2
 
7
- class OpenSSL::X509::Name
8
- alias _dump_data to_a
3
+ class TestMarshalStructure < Marshal::Structure::TestCase
9
4
 
10
- def _load_data ary
11
- ary.each do |entry|
12
- add_entry(*entry)
5
+ def test_class_load
6
+ ary = %W[\x04 \x08 T]
7
+ def ary.getc
8
+ shift
13
9
  end
14
- end
15
- end
16
-
17
- class B; end
18
-
19
- module C; end
20
-
21
- module E; end
22
-
23
- class M
24
- def marshal_dump
25
- 'marshal_dump'
26
- end
27
-
28
- def marshal_load o
29
- end
30
- end
31
-
32
- class U
33
- def self._load str
34
- new
35
- end
36
-
37
- def _dump limit
38
- s = '_dump'
39
- s.instance_variable_set :@ivar_on_dump_str, 'value on ivar on dump str'
40
- s
41
- end
42
- end
43
-
44
- S = Struct.new :f
45
-
46
- class TestMarshalStructure < MiniTest::Unit::TestCase
47
10
 
48
- def mu_pp obj
49
- s = ''
50
- s = PP.pp obj, s
51
- s.chomp
52
- end
11
+ result = @MS.load ary
53
12
 
54
- def setup
55
- @MS = Marshal::Structure
13
+ assert_equal :true, result
56
14
  end
57
15
 
58
- def test_construct
59
- str =
60
- "\004\b{\006:\006a[\031c\006Bm\006C\"\006d/\006e\000i\006" \
61
- "f\0322.2999999999999998\000ff" \
62
- "l+\n\000\000\000\000\000\000\000\000\001\0000TF}\000i\000" \
63
- "S:\006S\006:\006fi\000o:\vObject\000@\017" \
64
- "U:\006M\"\021marshal_dump" \
65
- "Iu:\006U\n_dump\006" \
66
- ":\026@ivar_on_dump_str\"\036value on ivar on dump str" \
67
- ";\000e:\006Eo;\b\000" \
68
- "I\"\025string with ivar\006:\v@value\"\017some value" \
69
- "C:\016BenString\"\000"
70
-
71
- structure = @MS.load str
72
-
73
- expected = [
74
- :hash,
75
- 0,
76
- 1,
77
- [:symbol, 0, "a"],
78
- [:array,
79
- 1,
80
- 20,
81
- [:class, 2, "B"],
82
- [:module, 3, "C"],
83
- [:string, 4, "d"],
84
- [:regexp, 5, "e", 0],
85
- [:fixnum, 1],
86
- [:float, 6, "2.2999999999999998\000ff"],
87
- [:bignum, 7, 1, 10, 18446744073709551616],
88
- :nil,
89
- :true,
90
- :false,
91
- [:hash_default, 8, 0, [:fixnum, 0]],
92
- [:struct, 9, [:symbol, 1, "S"], 1, [:symbol, 2, "f"], [:fixnum, 0]],
93
- [:object, 10, [:symbol, 3, "Object"], [0]],
94
- [:link, 10],
95
- [:user_marshal, 11, [:symbol, 4, "M"], [:string, 12, "marshal_dump"]],
96
- [:instance_variables,
97
- [:user_defined, 13, [:symbol, 5, "U"], "_dump"],
98
- 1,
99
- [:symbol, 6, "@ivar_on_dump_str"],
100
- [:string, 14, "value on ivar on dump str"]],
101
- [:symbol_link, 0],
102
- [:extended, [:symbol, 7, "E"], [:object, 15, [:symbol_link, 3], [0]]],
103
- [:instance_variables,
104
- [:string, 16, "string with ivar"],
105
- 1,
106
- [:symbol, 8, "@value"],
107
- [:string, 17, "some value"]],
108
- [:user_class, [:symbol, 9, "BenString"], [:string, 18, ""]]]]
109
-
110
- assert_equal expected, structure
16
+ def test_count_allocations
17
+ assert_equal 1, @MS.new("\x04\x08[\x06T").count_allocations
111
18
  end
112
19
 
113
- def test_construct_data
114
- name = OpenSSL::X509::Name.parse 'CN=nobody/DC=example'
115
- str = Marshal.dump name
116
-
117
- expected = [
118
- :data,
119
- 0,
120
- [:symbol, 0, "OpenSSL::X509::Name"],
121
- [:array,
122
- 1,
123
- 2,
124
- [:array, 2, 3,
125
- [:string, 3, "CN"],
126
- [:string, 4, "nobody"],
127
- [:fixnum, 12]],
128
- [:array, 5, 3,
129
- [:string, 6, "DC"],
130
- [:string, 7, "example"],
131
- [:fixnum, 22]]]]
132
-
133
- assert_equal expected, @MS.load(str)
134
- end
135
-
136
- def test_construct_module_old
137
- assert_equal [:module, 0, "M"], @MS.load("\x04\x08M\x06M")
20
+ def test_load
21
+ assert_equal [true], @MS.new("\x04\x08[\x06T").load
138
22
  end
139
23
 
140
- def test_consume
141
- ms = @MS.new "\x04\x08\x06M"
142
-
143
- assert_equal "\x06M", ms.consume(2)
24
+ def test_structure
25
+ assert_equal [:array, 0, 1, :true], @MS.new("\x04\x08[\x06T").structure
144
26
  end
145
27
 
146
- def test_consume_bytes
147
- ms = @MS.new "\x04\x08\x06M"
148
-
149
- assert_equal [6, 77], ms.consume_bytes(2)
150
- end
151
-
152
- def test_consume_byte
153
- ms = @MS.new "\x04\x08M"
154
-
155
- assert_equal 77, ms.consume_byte
156
- end
157
-
158
- def test_consume_character
159
- ms = @MS.new "\x04\x08M"
160
-
161
- assert_equal 'M', ms.consume_character
162
- end
28
+ def test_token_stream
29
+ stream = @MS.new("\x04\x08[\x06T").token_stream
163
30
 
164
- def test_get_byte_sequence
165
- ms = @MS.new "\x04\x08\x06M"
31
+ assert_kind_of Enumerator, stream
166
32
 
167
- assert_equal "M", ms.get_byte_sequence
33
+ assert_equal [:array, 1, :true], stream.to_a
168
34
  end
169
35
 
170
36
  end