marshal-structure 1.1.1 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +2 -1
- data/.autotest +0 -1
- data/History.txt +15 -0
- data/Manifest.txt +7 -0
- data/README.rdoc +16 -21
- data/Rakefile +1 -7
- data/lib/marshal/structure.rb +74 -542
- data/lib/marshal/structure/allocation_counter.rb +172 -0
- data/lib/marshal/structure/parser.rb +276 -0
- data/lib/marshal/structure/test_case.rb +95 -0
- data/lib/marshal/structure/tokenizer.rb +449 -0
- data/test/test_marshal_structure.rb +18 -152
- data/test/test_marshal_structure_allocation_counter.rb +163 -0
- data/test/test_marshal_structure_parser.rb +97 -0
- data/test/test_marshal_structure_tokenizer.rb +344 -0
- metadata +98 -90
- metadata.gz.sig +0 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
##
|
2
|
+
# Counts allocations necessary to load the stream. The number of allocations
|
3
|
+
# may be less as symbols (e.g. for object instance variables) may already
|
4
|
+
# exist.
|
5
|
+
#
|
6
|
+
# Allocation counts are determined as follows:
|
7
|
+
#
|
8
|
+
# * References to classes or modules are not counted. They either already
|
9
|
+
# exist or cause an ArgumentError upon load.
|
10
|
+
# * true, false, nil and Fixnum are not counted as they are all singletons.
|
11
|
+
# * Symbols count as one allocation even if they may already exist. (Marshal
|
12
|
+
# automatically compresses duplicate mentions of symbols and objects, so
|
13
|
+
# they will only be counted once).
|
14
|
+
# * Other objects are counted as one allocation.
|
15
|
+
|
16
|
+
class Marshal::Structure::AllocationCounter
|
17
|
+
|
18
|
+
##
|
19
|
+
# Creates a new AllocationCounter for +tokens+
|
20
|
+
|
21
|
+
def initialize tokens
|
22
|
+
@tokens = tokens
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Counts objects allocated from the stream.
|
27
|
+
|
28
|
+
def count
|
29
|
+
token = @tokens.next
|
30
|
+
|
31
|
+
case token
|
32
|
+
when :nil, :true, :false then 0
|
33
|
+
when :array then count_array
|
34
|
+
when :bignum then count_bignum
|
35
|
+
when :class, :module, :module_old then count_class
|
36
|
+
when :data then count_data
|
37
|
+
when :extended then count_extended
|
38
|
+
when :fixnum, :link, :symbol_link then @tokens.next; 0
|
39
|
+
when :float then count_float
|
40
|
+
when :hash then count_hash
|
41
|
+
when :hash_default then count_hash_default
|
42
|
+
when :object then count_object
|
43
|
+
when :regexp then count_regexp
|
44
|
+
when :string then count_string
|
45
|
+
when :struct then count_struct
|
46
|
+
when :symbol then count_symbol
|
47
|
+
when :user_class then count_extended
|
48
|
+
when :user_defined then count_user_defined
|
49
|
+
when :user_marshal then count_user_marshal
|
50
|
+
when :instance_variables then count + count_instance_variables
|
51
|
+
else
|
52
|
+
raise Marshal::Structure::Error, "bug: unknown token #{token.inspect}"
|
53
|
+
end
|
54
|
+
rescue Marshal::Structure::EndOfMarshal
|
55
|
+
raise ArgumentError, 'marshal data too short'
|
56
|
+
end
|
57
|
+
|
58
|
+
def count_array # :nodoc:
|
59
|
+
allocations = 1
|
60
|
+
|
61
|
+
@tokens.next.times do
|
62
|
+
allocations += count
|
63
|
+
end
|
64
|
+
|
65
|
+
allocations
|
66
|
+
end
|
67
|
+
|
68
|
+
def count_bignum # :nodoc:
|
69
|
+
@tokens.next
|
70
|
+
|
71
|
+
1
|
72
|
+
end
|
73
|
+
|
74
|
+
def count_class # :nodoc:
|
75
|
+
@tokens.next
|
76
|
+
|
77
|
+
0
|
78
|
+
end
|
79
|
+
|
80
|
+
def count_data # :nodoc:
|
81
|
+
get_symbol
|
82
|
+
|
83
|
+
1 + count
|
84
|
+
end
|
85
|
+
|
86
|
+
def count_extended # :nodoc:
|
87
|
+
get_symbol
|
88
|
+
|
89
|
+
count
|
90
|
+
end
|
91
|
+
|
92
|
+
alias count_float count_bignum # :nodoc:
|
93
|
+
|
94
|
+
def count_hash # :nodoc:
|
95
|
+
allocations = 1
|
96
|
+
|
97
|
+
@tokens.next.times do
|
98
|
+
allocations += count
|
99
|
+
allocations += count
|
100
|
+
end
|
101
|
+
|
102
|
+
allocations
|
103
|
+
end
|
104
|
+
|
105
|
+
def count_hash_default # :nodoc:
|
106
|
+
count_hash + count
|
107
|
+
end
|
108
|
+
|
109
|
+
def count_instance_variables # :nodoc:
|
110
|
+
allocations = 0
|
111
|
+
|
112
|
+
@tokens.next.times do
|
113
|
+
allocations += get_symbol
|
114
|
+
allocations += count
|
115
|
+
end
|
116
|
+
|
117
|
+
allocations
|
118
|
+
end
|
119
|
+
|
120
|
+
def count_object # :nodoc:
|
121
|
+
get_symbol + count_instance_variables
|
122
|
+
end
|
123
|
+
|
124
|
+
def count_regexp # :nodoc:
|
125
|
+
@tokens.next
|
126
|
+
@tokens.next
|
127
|
+
|
128
|
+
1
|
129
|
+
end
|
130
|
+
|
131
|
+
alias count_string count_bignum # :nodoc:
|
132
|
+
|
133
|
+
def count_struct # :nodoc:
|
134
|
+
allocations = 1
|
135
|
+
|
136
|
+
get_symbol
|
137
|
+
|
138
|
+
@tokens.next.times do
|
139
|
+
allocations += get_symbol
|
140
|
+
allocations += count
|
141
|
+
end
|
142
|
+
|
143
|
+
allocations
|
144
|
+
end
|
145
|
+
|
146
|
+
alias count_symbol count_bignum
|
147
|
+
|
148
|
+
def count_user_defined # :nodoc:
|
149
|
+
allocations = get_symbol + 1
|
150
|
+
|
151
|
+
@tokens.next
|
152
|
+
|
153
|
+
allocations
|
154
|
+
end
|
155
|
+
|
156
|
+
def count_user_marshal # :nodoc:
|
157
|
+
get_symbol + count
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_symbol # :nodoc:
|
161
|
+
token = @tokens.next
|
162
|
+
|
163
|
+
case token
|
164
|
+
when :symbol then count_symbol
|
165
|
+
when :symbol_link then @tokens.next; 0
|
166
|
+
else
|
167
|
+
raise ArgumentError, "expected SYMBOL or SYMLINK, got #{token.inspect}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
@@ -0,0 +1,276 @@
|
|
1
|
+
##
|
2
|
+
# Parses a tokenized Marshal stream into a structure that resembles how the
|
3
|
+
# stream would be loaded.
|
4
|
+
#
|
5
|
+
# Marshal can contain references to previous objects. These references are
|
6
|
+
# included in the structure following referenceable items. For example, a
|
7
|
+
# recursive array:
|
8
|
+
#
|
9
|
+
# a = []
|
10
|
+
# a << self
|
11
|
+
#
|
12
|
+
# Has the following Marshal stream:
|
13
|
+
#
|
14
|
+
# "\x04\x08[\x06@\x00" # @\x00 is a link to the first Object in the stream
|
15
|
+
#
|
16
|
+
# And has the following Marshal structure:
|
17
|
+
#
|
18
|
+
# [:array, 0, 1,
|
19
|
+
# [:link, 0]]
|
20
|
+
#
|
21
|
+
# The first item after +:array+, the +0+ is the object's stream ID. The
|
22
|
+
# +:link+ references this ID.
|
23
|
+
|
24
|
+
class Marshal::Structure::Parser
|
25
|
+
|
26
|
+
##
|
27
|
+
# Creates a new Parser using a token stream Enumerator +tokens+.
|
28
|
+
|
29
|
+
def initialize tokens
|
30
|
+
@tokens = tokens
|
31
|
+
@objects = -1
|
32
|
+
@symbols = -1
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Creates a new object reference
|
37
|
+
|
38
|
+
def object_ref
|
39
|
+
@objects += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# Creates the structure for the remaining stream.
|
44
|
+
|
45
|
+
def parse
|
46
|
+
token = @tokens.next
|
47
|
+
|
48
|
+
return token if [:nil, :true, :false].include? token
|
49
|
+
|
50
|
+
obj = [token]
|
51
|
+
|
52
|
+
rest =
|
53
|
+
case token
|
54
|
+
when :array then parse_array
|
55
|
+
when :bignum then parse_bignum
|
56
|
+
when :class, :module then parse_class
|
57
|
+
when :data then parse_data
|
58
|
+
when :extended then parse_extended
|
59
|
+
when :fixnum, :link, :symbol_link then [@tokens.next]
|
60
|
+
when :float then parse_float
|
61
|
+
when :hash then parse_hash
|
62
|
+
when :hash_default then parse_hash_def
|
63
|
+
when :object then parse_object
|
64
|
+
when :regexp then parse_regexp
|
65
|
+
when :string then parse_string
|
66
|
+
when :struct then parse_struct
|
67
|
+
when :symbol then parse_symbol
|
68
|
+
when :user_class then parse_extended
|
69
|
+
when :user_defined then parse_user_defined
|
70
|
+
when :user_marshal then parse_user_marshal
|
71
|
+
when :instance_variables then
|
72
|
+
[parse].concat parse_instance_variables
|
73
|
+
when :module_old then
|
74
|
+
obj[0] = :module
|
75
|
+
parse_class
|
76
|
+
else
|
77
|
+
raise Marshal::Structure::Error, "bug: unknown token #{token.inspect}"
|
78
|
+
end
|
79
|
+
|
80
|
+
obj.concat rest
|
81
|
+
rescue Marshal::Structure::EndOfMarshal
|
82
|
+
raise ArgumentError, 'marshal data too short'
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Creates the body of an +:array+ object
|
87
|
+
|
88
|
+
def parse_array
|
89
|
+
obj = [object_ref]
|
90
|
+
|
91
|
+
items = @tokens.next
|
92
|
+
|
93
|
+
obj << items
|
94
|
+
|
95
|
+
items.times do
|
96
|
+
obj << parse
|
97
|
+
end
|
98
|
+
|
99
|
+
obj
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Creates the body of a +:bignum+ object
|
104
|
+
|
105
|
+
def parse_bignum
|
106
|
+
result = @tokens.next
|
107
|
+
|
108
|
+
[object_ref, result]
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Creates the body of a +:class+ object
|
113
|
+
|
114
|
+
def parse_class
|
115
|
+
[object_ref, @tokens.next]
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
# Creates the body of a wrapped C pointer object
|
120
|
+
|
121
|
+
def parse_data
|
122
|
+
[object_ref, get_symbol, parse]
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# Creates the body of an extended object
|
127
|
+
|
128
|
+
def parse_extended
|
129
|
+
[get_symbol, parse]
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Creates the body of a +:float+ object
|
134
|
+
|
135
|
+
def parse_float
|
136
|
+
float = @tokens.next
|
137
|
+
|
138
|
+
[object_ref, float]
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Creates the body of a +:hash+ object
|
143
|
+
|
144
|
+
def parse_hash
|
145
|
+
obj = [object_ref]
|
146
|
+
|
147
|
+
pairs = @tokens.next
|
148
|
+
obj << pairs
|
149
|
+
|
150
|
+
pairs.times do
|
151
|
+
obj << parse
|
152
|
+
obj << parse
|
153
|
+
end
|
154
|
+
|
155
|
+
obj
|
156
|
+
end
|
157
|
+
|
158
|
+
##
|
159
|
+
# Creates the body of a +:hash_def+ object
|
160
|
+
|
161
|
+
def parse_hash_def
|
162
|
+
ref, hash = parse_hash
|
163
|
+
|
164
|
+
[ref, hash, parse]
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Instance variables contain an object followed by a count of instance
|
169
|
+
# variables and their contents
|
170
|
+
|
171
|
+
def parse_instance_variables
|
172
|
+
instance_variables = []
|
173
|
+
|
174
|
+
pairs = @tokens.next
|
175
|
+
instance_variables << pairs
|
176
|
+
|
177
|
+
pairs.times do
|
178
|
+
instance_variables << get_symbol
|
179
|
+
instance_variables << parse
|
180
|
+
end
|
181
|
+
|
182
|
+
instance_variables
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Creates an Object
|
187
|
+
|
188
|
+
def parse_object
|
189
|
+
[object_ref, get_symbol, parse_instance_variables]
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# Creates a Regexp
|
194
|
+
|
195
|
+
def parse_regexp
|
196
|
+
[object_ref, @tokens.next, @tokens.next]
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Creates a String
|
201
|
+
|
202
|
+
def parse_string
|
203
|
+
[object_ref, @tokens.next]
|
204
|
+
end
|
205
|
+
|
206
|
+
##
|
207
|
+
# Creates a Struct
|
208
|
+
|
209
|
+
def parse_struct
|
210
|
+
obj = [object_ref, get_symbol]
|
211
|
+
|
212
|
+
members = @tokens.next
|
213
|
+
obj << members
|
214
|
+
|
215
|
+
members.times do
|
216
|
+
obj << get_symbol
|
217
|
+
obj << parse
|
218
|
+
end
|
219
|
+
|
220
|
+
obj
|
221
|
+
end
|
222
|
+
|
223
|
+
##
|
224
|
+
# Creates a Symbol
|
225
|
+
|
226
|
+
def parse_symbol
|
227
|
+
sym = @tokens.next
|
228
|
+
|
229
|
+
[symbol_ref, sym]
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Creates an object saved by _dump
|
234
|
+
|
235
|
+
def parse_user_defined
|
236
|
+
name = get_symbol
|
237
|
+
|
238
|
+
data = @tokens.next
|
239
|
+
|
240
|
+
[object_ref, name, data]
|
241
|
+
end
|
242
|
+
|
243
|
+
##
|
244
|
+
# Creates an object saved by marshal_dump
|
245
|
+
|
246
|
+
def parse_user_marshal
|
247
|
+
name = get_symbol
|
248
|
+
|
249
|
+
[object_ref, name, parse]
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# Creates a new symbol reference
|
254
|
+
|
255
|
+
def symbol_ref
|
256
|
+
@symbols += 1
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# Constructs a Symbol from the token stream
|
261
|
+
|
262
|
+
def get_symbol
|
263
|
+
token = @tokens.next
|
264
|
+
|
265
|
+
case token
|
266
|
+
when :symbol then
|
267
|
+
[:symbol, *parse_symbol]
|
268
|
+
when :symbol_link then
|
269
|
+
[:symbol_link, @tokens.next]
|
270
|
+
else
|
271
|
+
raise ArgumentError, "expected SYMBOL or SYMLINK, got #{token.inspect}"
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
end
|
276
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'marshal/structure'
|
3
|
+
require 'ben_string'
|
4
|
+
require 'openssl'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# :stopdoc:
|
8
|
+
|
9
|
+
class OpenSSL::X509::Name
|
10
|
+
alias _dump_data to_a
|
11
|
+
|
12
|
+
def _load_data ary
|
13
|
+
ary.each do |entry|
|
14
|
+
add_entry(*entry)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class B; end
|
20
|
+
|
21
|
+
module C; end
|
22
|
+
|
23
|
+
module E; end
|
24
|
+
|
25
|
+
class M
|
26
|
+
def marshal_dump
|
27
|
+
'marshal_dump'
|
28
|
+
end
|
29
|
+
|
30
|
+
def marshal_load o
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class U
|
35
|
+
def self._load str
|
36
|
+
new
|
37
|
+
end
|
38
|
+
|
39
|
+
def _dump limit
|
40
|
+
s = '_dump'
|
41
|
+
s.instance_variable_set :@ivar_on_dump_str, 'value on ivar on dump str'
|
42
|
+
s
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
S = Struct.new :f
|
47
|
+
|
48
|
+
# :startdoc:
|
49
|
+
|
50
|
+
##
|
51
|
+
# A TestCase for writing tests for Marshal::Structure and alternative parsers
|
52
|
+
# of Marshal streams.
|
53
|
+
|
54
|
+
class Marshal::Structure::TestCase < MiniTest::Unit::TestCase
|
55
|
+
|
56
|
+
##
|
57
|
+
# A Marshal stream with (almost) every type in it. The notable absence is
|
58
|
+
# of a Data type.
|
59
|
+
|
60
|
+
EVERYTHING =
|
61
|
+
"\004\b{\006:\006a[\031c\006Bm\006C\"\006d/\006e\000i\006" \
|
62
|
+
"f\0322.2999999999999998\000ff" \
|
63
|
+
"l+\n\000\000\000\000\000\000\000\000\001\0000TF}\000i\000" \
|
64
|
+
"S:\006S\006:\006fi\000o:\vObject\000@\017" \
|
65
|
+
"U:\006M\"\021marshal_dump" \
|
66
|
+
"Iu:\006U\n_dump\006" \
|
67
|
+
":\026@ivar_on_dump_str\"\036value on ivar on dump str" \
|
68
|
+
";\000e:\006Eo;\b\000" \
|
69
|
+
"I\"\025string with ivar\006:\v@value\"\017some value" \
|
70
|
+
"C:\016BenString\"\000"
|
71
|
+
|
72
|
+
##
|
73
|
+
# Pretty-print minitest diff output
|
74
|
+
|
75
|
+
def mu_pp obj # :nodoc:
|
76
|
+
s = ''
|
77
|
+
s = PP.pp obj, s
|
78
|
+
s.chomp
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# Creates the following convenience namespace instance variables:
|
83
|
+
#
|
84
|
+
# @MS:: Marshal::Structure
|
85
|
+
# @MSP:: Marshal::Structure::Tokenizer
|
86
|
+
# @MST:: Marshal::Structure::Parser
|
87
|
+
|
88
|
+
def setup
|
89
|
+
@MS = Marshal::Structure
|
90
|
+
@MSP = Marshal::Structure::Parser
|
91
|
+
@MST = Marshal::Structure::Tokenizer
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|