marshal-structure 1.1.1 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +2 -1
- data/.autotest +0 -1
- data/History.txt +15 -0
- data/Manifest.txt +7 -0
- data/README.rdoc +16 -21
- data/Rakefile +1 -7
- data/lib/marshal/structure.rb +74 -542
- data/lib/marshal/structure/allocation_counter.rb +172 -0
- data/lib/marshal/structure/parser.rb +276 -0
- data/lib/marshal/structure/test_case.rb +95 -0
- data/lib/marshal/structure/tokenizer.rb +449 -0
- data/test/test_marshal_structure.rb +18 -152
- data/test/test_marshal_structure_allocation_counter.rb +163 -0
- data/test/test_marshal_structure_parser.rb +97 -0
- data/test/test_marshal_structure_tokenizer.rb +344 -0
- metadata +98 -90
- metadata.gz.sig +0 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
##
|
2
|
+
# Counts allocations necessary to load the stream. The number of allocations
|
3
|
+
# may be less as symbols (e.g. for object instance variables) may already
|
4
|
+
# exist.
|
5
|
+
#
|
6
|
+
# Allocation counts are determined as follows:
|
7
|
+
#
|
8
|
+
# * References to classes or modules are not counted. They either already
|
9
|
+
# exist or cause an ArgumentError upon load.
|
10
|
+
# * true, false, nil and Fixnum are not counted as they are all singletons.
|
11
|
+
# * Symbols count as one allocation even if they may already exist. (Marshal
|
12
|
+
# automatically compresses duplicate mentions of symbols and objects, so
|
13
|
+
# they will only be counted once).
|
14
|
+
# * Other objects are counted as one allocation.
|
15
|
+
|
16
|
+
class Marshal::Structure::AllocationCounter
|
17
|
+
|
18
|
+
##
|
19
|
+
# Creates a new AllocationCounter for +tokens+
|
20
|
+
|
21
|
+
def initialize tokens
|
22
|
+
@tokens = tokens
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Counts objects allocated from the stream.
|
27
|
+
|
28
|
+
def count
|
29
|
+
token = @tokens.next
|
30
|
+
|
31
|
+
case token
|
32
|
+
when :nil, :true, :false then 0
|
33
|
+
when :array then count_array
|
34
|
+
when :bignum then count_bignum
|
35
|
+
when :class, :module, :module_old then count_class
|
36
|
+
when :data then count_data
|
37
|
+
when :extended then count_extended
|
38
|
+
when :fixnum, :link, :symbol_link then @tokens.next; 0
|
39
|
+
when :float then count_float
|
40
|
+
when :hash then count_hash
|
41
|
+
when :hash_default then count_hash_default
|
42
|
+
when :object then count_object
|
43
|
+
when :regexp then count_regexp
|
44
|
+
when :string then count_string
|
45
|
+
when :struct then count_struct
|
46
|
+
when :symbol then count_symbol
|
47
|
+
when :user_class then count_extended
|
48
|
+
when :user_defined then count_user_defined
|
49
|
+
when :user_marshal then count_user_marshal
|
50
|
+
when :instance_variables then count + count_instance_variables
|
51
|
+
else
|
52
|
+
raise Marshal::Structure::Error, "bug: unknown token #{token.inspect}"
|
53
|
+
end
|
54
|
+
rescue Marshal::Structure::EndOfMarshal
|
55
|
+
raise ArgumentError, 'marshal data too short'
|
56
|
+
end
|
57
|
+
|
58
|
+
def count_array # :nodoc:
|
59
|
+
allocations = 1
|
60
|
+
|
61
|
+
@tokens.next.times do
|
62
|
+
allocations += count
|
63
|
+
end
|
64
|
+
|
65
|
+
allocations
|
66
|
+
end
|
67
|
+
|
68
|
+
def count_bignum # :nodoc:
|
69
|
+
@tokens.next
|
70
|
+
|
71
|
+
1
|
72
|
+
end
|
73
|
+
|
74
|
+
def count_class # :nodoc:
|
75
|
+
@tokens.next
|
76
|
+
|
77
|
+
0
|
78
|
+
end
|
79
|
+
|
80
|
+
def count_data # :nodoc:
|
81
|
+
get_symbol
|
82
|
+
|
83
|
+
1 + count
|
84
|
+
end
|
85
|
+
|
86
|
+
def count_extended # :nodoc:
|
87
|
+
get_symbol
|
88
|
+
|
89
|
+
count
|
90
|
+
end
|
91
|
+
|
92
|
+
alias count_float count_bignum # :nodoc:
|
93
|
+
|
94
|
+
def count_hash # :nodoc:
|
95
|
+
allocations = 1
|
96
|
+
|
97
|
+
@tokens.next.times do
|
98
|
+
allocations += count
|
99
|
+
allocations += count
|
100
|
+
end
|
101
|
+
|
102
|
+
allocations
|
103
|
+
end
|
104
|
+
|
105
|
+
def count_hash_default # :nodoc:
|
106
|
+
count_hash + count
|
107
|
+
end
|
108
|
+
|
109
|
+
def count_instance_variables # :nodoc:
|
110
|
+
allocations = 0
|
111
|
+
|
112
|
+
@tokens.next.times do
|
113
|
+
allocations += get_symbol
|
114
|
+
allocations += count
|
115
|
+
end
|
116
|
+
|
117
|
+
allocations
|
118
|
+
end
|
119
|
+
|
120
|
+
def count_object # :nodoc:
|
121
|
+
get_symbol + count_instance_variables
|
122
|
+
end
|
123
|
+
|
124
|
+
def count_regexp # :nodoc:
|
125
|
+
@tokens.next
|
126
|
+
@tokens.next
|
127
|
+
|
128
|
+
1
|
129
|
+
end
|
130
|
+
|
131
|
+
alias count_string count_bignum # :nodoc:
|
132
|
+
|
133
|
+
def count_struct # :nodoc:
|
134
|
+
allocations = 1
|
135
|
+
|
136
|
+
get_symbol
|
137
|
+
|
138
|
+
@tokens.next.times do
|
139
|
+
allocations += get_symbol
|
140
|
+
allocations += count
|
141
|
+
end
|
142
|
+
|
143
|
+
allocations
|
144
|
+
end
|
145
|
+
|
146
|
+
alias count_symbol count_bignum
|
147
|
+
|
148
|
+
def count_user_defined # :nodoc:
|
149
|
+
allocations = get_symbol + 1
|
150
|
+
|
151
|
+
@tokens.next
|
152
|
+
|
153
|
+
allocations
|
154
|
+
end
|
155
|
+
|
156
|
+
def count_user_marshal # :nodoc:
|
157
|
+
get_symbol + count
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_symbol # :nodoc:
|
161
|
+
token = @tokens.next
|
162
|
+
|
163
|
+
case token
|
164
|
+
when :symbol then count_symbol
|
165
|
+
when :symbol_link then @tokens.next; 0
|
166
|
+
else
|
167
|
+
raise ArgumentError, "expected SYMBOL or SYMLINK, got #{token.inspect}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
@@ -0,0 +1,276 @@
|
|
1
|
+
##
|
2
|
+
# Parses a tokenized Marshal stream into a structure that resembles how the
|
3
|
+
# stream would be loaded.
|
4
|
+
#
|
5
|
+
# Marshal can contain references to previous objects. These references are
|
6
|
+
# included in the structure following referenceable items. For example, a
|
7
|
+
# recursive array:
|
8
|
+
#
|
9
|
+
# a = []
|
10
|
+
# a << self
|
11
|
+
#
|
12
|
+
# Has the following Marshal stream:
|
13
|
+
#
|
14
|
+
# "\x04\x08[\x06@\x00" # @\x00 is a link to the first Object in the stream
|
15
|
+
#
|
16
|
+
# And has the following Marshal structure:
|
17
|
+
#
|
18
|
+
# [:array, 0, 1,
|
19
|
+
# [:link, 0]]
|
20
|
+
#
|
21
|
+
# The first item after +:array+, the +0+ is the object's stream ID. The
|
22
|
+
# +:link+ references this ID.
|
23
|
+
|
24
|
+
class Marshal::Structure::Parser
|
25
|
+
|
26
|
+
##
|
27
|
+
# Creates a new Parser using a token stream Enumerator +tokens+.
|
28
|
+
|
29
|
+
def initialize tokens
|
30
|
+
@tokens = tokens
|
31
|
+
@objects = -1
|
32
|
+
@symbols = -1
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Creates a new object reference
|
37
|
+
|
38
|
+
def object_ref
|
39
|
+
@objects += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# Creates the structure for the remaining stream.
|
44
|
+
|
45
|
+
def parse
|
46
|
+
token = @tokens.next
|
47
|
+
|
48
|
+
return token if [:nil, :true, :false].include? token
|
49
|
+
|
50
|
+
obj = [token]
|
51
|
+
|
52
|
+
rest =
|
53
|
+
case token
|
54
|
+
when :array then parse_array
|
55
|
+
when :bignum then parse_bignum
|
56
|
+
when :class, :module then parse_class
|
57
|
+
when :data then parse_data
|
58
|
+
when :extended then parse_extended
|
59
|
+
when :fixnum, :link, :symbol_link then [@tokens.next]
|
60
|
+
when :float then parse_float
|
61
|
+
when :hash then parse_hash
|
62
|
+
when :hash_default then parse_hash_def
|
63
|
+
when :object then parse_object
|
64
|
+
when :regexp then parse_regexp
|
65
|
+
when :string then parse_string
|
66
|
+
when :struct then parse_struct
|
67
|
+
when :symbol then parse_symbol
|
68
|
+
when :user_class then parse_extended
|
69
|
+
when :user_defined then parse_user_defined
|
70
|
+
when :user_marshal then parse_user_marshal
|
71
|
+
when :instance_variables then
|
72
|
+
[parse].concat parse_instance_variables
|
73
|
+
when :module_old then
|
74
|
+
obj[0] = :module
|
75
|
+
parse_class
|
76
|
+
else
|
77
|
+
raise Marshal::Structure::Error, "bug: unknown token #{token.inspect}"
|
78
|
+
end
|
79
|
+
|
80
|
+
obj.concat rest
|
81
|
+
rescue Marshal::Structure::EndOfMarshal
|
82
|
+
raise ArgumentError, 'marshal data too short'
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Creates the body of an +:array+ object
|
87
|
+
|
88
|
+
def parse_array
|
89
|
+
obj = [object_ref]
|
90
|
+
|
91
|
+
items = @tokens.next
|
92
|
+
|
93
|
+
obj << items
|
94
|
+
|
95
|
+
items.times do
|
96
|
+
obj << parse
|
97
|
+
end
|
98
|
+
|
99
|
+
obj
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Creates the body of a +:bignum+ object
|
104
|
+
|
105
|
+
def parse_bignum
|
106
|
+
result = @tokens.next
|
107
|
+
|
108
|
+
[object_ref, result]
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Creates the body of a +:class+ object
|
113
|
+
|
114
|
+
def parse_class
|
115
|
+
[object_ref, @tokens.next]
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
# Creates the body of a wrapped C pointer object
|
120
|
+
|
121
|
+
def parse_data
|
122
|
+
[object_ref, get_symbol, parse]
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# Creates the body of an extended object
|
127
|
+
|
128
|
+
def parse_extended
|
129
|
+
[get_symbol, parse]
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Creates the body of a +:float+ object
|
134
|
+
|
135
|
+
def parse_float
|
136
|
+
float = @tokens.next
|
137
|
+
|
138
|
+
[object_ref, float]
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Creates the body of a +:hash+ object
|
143
|
+
|
144
|
+
def parse_hash
|
145
|
+
obj = [object_ref]
|
146
|
+
|
147
|
+
pairs = @tokens.next
|
148
|
+
obj << pairs
|
149
|
+
|
150
|
+
pairs.times do
|
151
|
+
obj << parse
|
152
|
+
obj << parse
|
153
|
+
end
|
154
|
+
|
155
|
+
obj
|
156
|
+
end
|
157
|
+
|
158
|
+
##
|
159
|
+
# Creates the body of a +:hash_def+ object
|
160
|
+
|
161
|
+
def parse_hash_def
|
162
|
+
ref, hash = parse_hash
|
163
|
+
|
164
|
+
[ref, hash, parse]
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Instance variables contain an object followed by a count of instance
|
169
|
+
# variables and their contents
|
170
|
+
|
171
|
+
def parse_instance_variables
|
172
|
+
instance_variables = []
|
173
|
+
|
174
|
+
pairs = @tokens.next
|
175
|
+
instance_variables << pairs
|
176
|
+
|
177
|
+
pairs.times do
|
178
|
+
instance_variables << get_symbol
|
179
|
+
instance_variables << parse
|
180
|
+
end
|
181
|
+
|
182
|
+
instance_variables
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Creates an Object
|
187
|
+
|
188
|
+
def parse_object
|
189
|
+
[object_ref, get_symbol, parse_instance_variables]
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# Creates a Regexp
|
194
|
+
|
195
|
+
def parse_regexp
|
196
|
+
[object_ref, @tokens.next, @tokens.next]
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Creates a String
|
201
|
+
|
202
|
+
def parse_string
|
203
|
+
[object_ref, @tokens.next]
|
204
|
+
end
|
205
|
+
|
206
|
+
##
|
207
|
+
# Creates a Struct
|
208
|
+
|
209
|
+
def parse_struct
|
210
|
+
obj = [object_ref, get_symbol]
|
211
|
+
|
212
|
+
members = @tokens.next
|
213
|
+
obj << members
|
214
|
+
|
215
|
+
members.times do
|
216
|
+
obj << get_symbol
|
217
|
+
obj << parse
|
218
|
+
end
|
219
|
+
|
220
|
+
obj
|
221
|
+
end
|
222
|
+
|
223
|
+
##
|
224
|
+
# Creates a Symbol
|
225
|
+
|
226
|
+
def parse_symbol
|
227
|
+
sym = @tokens.next
|
228
|
+
|
229
|
+
[symbol_ref, sym]
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Creates an object saved by _dump
|
234
|
+
|
235
|
+
def parse_user_defined
|
236
|
+
name = get_symbol
|
237
|
+
|
238
|
+
data = @tokens.next
|
239
|
+
|
240
|
+
[object_ref, name, data]
|
241
|
+
end
|
242
|
+
|
243
|
+
##
|
244
|
+
# Creates an object saved by marshal_dump
|
245
|
+
|
246
|
+
def parse_user_marshal
|
247
|
+
name = get_symbol
|
248
|
+
|
249
|
+
[object_ref, name, parse]
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# Creates a new symbol reference
|
254
|
+
|
255
|
+
def symbol_ref
|
256
|
+
@symbols += 1
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# Constructs a Symbol from the token stream
|
261
|
+
|
262
|
+
def get_symbol
|
263
|
+
token = @tokens.next
|
264
|
+
|
265
|
+
case token
|
266
|
+
when :symbol then
|
267
|
+
[:symbol, *parse_symbol]
|
268
|
+
when :symbol_link then
|
269
|
+
[:symbol_link, @tokens.next]
|
270
|
+
else
|
271
|
+
raise ArgumentError, "expected SYMBOL or SYMLINK, got #{token.inspect}"
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
end
|
276
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'marshal/structure'
|
3
|
+
require 'ben_string'
|
4
|
+
require 'openssl'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# :stopdoc:
|
8
|
+
|
9
|
+
class OpenSSL::X509::Name
|
10
|
+
alias _dump_data to_a
|
11
|
+
|
12
|
+
def _load_data ary
|
13
|
+
ary.each do |entry|
|
14
|
+
add_entry(*entry)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class B; end
|
20
|
+
|
21
|
+
module C; end
|
22
|
+
|
23
|
+
module E; end
|
24
|
+
|
25
|
+
class M
|
26
|
+
def marshal_dump
|
27
|
+
'marshal_dump'
|
28
|
+
end
|
29
|
+
|
30
|
+
def marshal_load o
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class U
|
35
|
+
def self._load str
|
36
|
+
new
|
37
|
+
end
|
38
|
+
|
39
|
+
def _dump limit
|
40
|
+
s = '_dump'
|
41
|
+
s.instance_variable_set :@ivar_on_dump_str, 'value on ivar on dump str'
|
42
|
+
s
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
S = Struct.new :f
|
47
|
+
|
48
|
+
# :startdoc:
|
49
|
+
|
50
|
+
##
|
51
|
+
# A TestCase for writing tests for Marshal::Structure and alternative parsers
|
52
|
+
# of Marshal streams.
|
53
|
+
|
54
|
+
class Marshal::Structure::TestCase < MiniTest::Unit::TestCase
|
55
|
+
|
56
|
+
##
|
57
|
+
# A Marshal stream with (almost) every type in it. The notable absence is
|
58
|
+
# of a Data type.
|
59
|
+
|
60
|
+
EVERYTHING =
|
61
|
+
"\004\b{\006:\006a[\031c\006Bm\006C\"\006d/\006e\000i\006" \
|
62
|
+
"f\0322.2999999999999998\000ff" \
|
63
|
+
"l+\n\000\000\000\000\000\000\000\000\001\0000TF}\000i\000" \
|
64
|
+
"S:\006S\006:\006fi\000o:\vObject\000@\017" \
|
65
|
+
"U:\006M\"\021marshal_dump" \
|
66
|
+
"Iu:\006U\n_dump\006" \
|
67
|
+
":\026@ivar_on_dump_str\"\036value on ivar on dump str" \
|
68
|
+
";\000e:\006Eo;\b\000" \
|
69
|
+
"I\"\025string with ivar\006:\v@value\"\017some value" \
|
70
|
+
"C:\016BenString\"\000"
|
71
|
+
|
72
|
+
##
|
73
|
+
# Pretty-print minitest diff output
|
74
|
+
|
75
|
+
def mu_pp obj # :nodoc:
|
76
|
+
s = ''
|
77
|
+
s = PP.pp obj, s
|
78
|
+
s.chomp
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# Creates the following convenience namespace instance variables:
|
83
|
+
#
|
84
|
+
# @MS:: Marshal::Structure
|
85
|
+
# @MSP:: Marshal::Structure::Tokenizer
|
86
|
+
# @MST:: Marshal::Structure::Parser
|
87
|
+
|
88
|
+
def setup
|
89
|
+
@MS = Marshal::Structure
|
90
|
+
@MSP = Marshal::Structure::Parser
|
91
|
+
@MST = Marshal::Structure::Tokenizer
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|