marshal-parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module AST
6
+ class SExpression
7
+ def initialize(node, source_string, renderer)
8
+ @node = node
9
+ @source_string = source_string
10
+ @renderer = renderer
11
+ end
12
+
13
+ def string
14
+ entries = node_to_entries(@node)
15
+ block = Renderers::EntriesBlock.new(entries)
16
+ @renderer.render(block)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_entries(node)
22
+ child_entries = node.child_entities
23
+ .select { |e| (e.is_a?(Lexer::Token) && node.attributes.key?(e)) || e.is_a?(Parser::Node) }
24
+ .map do |entry|
25
+ case entry
26
+ when Lexer::Token
27
+ options = node.attributes[entry]
28
+ name = options[:name]
29
+ value = options[:value]
30
+
31
+ name = name.to_s.gsub(/_/, "-")
32
+
33
+ if value.is_a?(Lexer::Token)
34
+ value = @source_string[entry.index, entry.length].dump
35
+ end
36
+
37
+ Renderers::Line.new("(#{name} #{value})")
38
+ when Parser::Node
39
+ node_to_entries(entry)
40
+ end
41
+ end.flatten
42
+
43
+ name = node_to_name(node)
44
+ entries = [Renderers::Line.new("(#{name}")] + child_entries
45
+ close_bracket(entries.last)
46
+
47
+ raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
48
+
49
+ if node.is_a?(Parser::Annotatable)
50
+ string = entries[0].string
51
+ annotation = node.annotation
52
+ entries[0] = Renderers::LineAnnotated.new(string, annotation)
53
+ end
54
+
55
+ if entries.size > 1
56
+ [entries[0], Renderers::EntriesBlock.new(entries[1..])]
57
+ else
58
+ entries
59
+ end
60
+ end
61
+
62
+ # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
63
+ def node_to_name(node)
64
+ node.class.name.to_s
65
+ .split("::").last
66
+ .sub(/Node\Z/, "")
67
+ .gsub(/([a-z])([A-Z])/, '\1-\2')
68
+ .downcase
69
+ end
70
+
71
+ def close_bracket(entry)
72
+ case entry
73
+ when Renderers::Line
74
+ entry.string << ")"
75
+ when Renderers::EntriesBlock
76
+ close_bracket(entry.entries.last)
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module AST
6
+ class SExpressionCompact
7
+ def initialize(node, source_string, renderer)
8
+ @node = node
9
+ @source_string = source_string
10
+ @renderer = renderer
11
+ end
12
+
13
+ def string
14
+ entries = node_to_entries(@node)
15
+ block = Renderers::EntriesBlock.new(entries)
16
+ @renderer.render(block)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_entries(node)
22
+ child_entries = node.child_entities
23
+ .select { |e| e.is_a?(Parser::Node) || e == node.literal_token }
24
+ .map do |e|
25
+ if e.is_a?(Parser::Node)
26
+ node_to_entries(e)
27
+ else
28
+ literal_token = node.literal_token
29
+ value = node.attributes[literal_token][:value]
30
+
31
+ if value.is_a?(Lexer::Token)
32
+ content = @source_string[value.index, value.length].dump
33
+ else
34
+ content = value.to_s
35
+ end
36
+
37
+ Renderers::Line.new(content)
38
+ end
39
+ end
40
+ .flatten
41
+
42
+ name = node_to_name(node)
43
+ entries = [Renderers::Line.new(name)] + child_entries
44
+
45
+ if node.literal_token && (entries.size == 2 && entries.all?(Renderers::Line))
46
+ strings = entries.map(&:string)
47
+ entries = [Renderers::Line.new(strings.join(" "))]
48
+ end
49
+
50
+ unless node.always_leaf?
51
+ entries[0] = Renderers::Line.new("(#{entries[0].string}")
52
+ close_bracket(entries.last)
53
+ end
54
+
55
+ raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
56
+
57
+ if node.is_a?(Parser::Annotatable)
58
+ string = entries[0].string
59
+ annotation = node.annotation
60
+ entries[0] = Renderers::LineAnnotated.new(string, annotation)
61
+ end
62
+
63
+ if entries.size > 1
64
+ [entries[0], Renderers::EntriesBlock.new(entries[1..])]
65
+ else
66
+ entries
67
+ end
68
+ end
69
+
70
+ # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
71
+ def node_to_name(node)
72
+ node.class.name.to_s
73
+ .split("::").last
74
+ .sub(/Node\Z/, "")
75
+ .gsub(/([a-z])([A-Z])/, '\1-\2')
76
+ .downcase
77
+ end
78
+
79
+ def close_bracket(entry)
80
+ case entry
81
+ when Renderers::Line
82
+ entry.string << ")"
83
+ when Renderers::EntriesBlock
84
+ close_bracket(entry.entries.last)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Symbols
6
+ class Table
7
+ def initialize(symbols)
8
+ @symbols = symbols
9
+ end
10
+
11
+ def string
12
+ @symbols.map.with_index do |symbol, i|
13
+ "%-4d - :%s" % [i, symbol]
14
+ end.join("\n")
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Tokens
6
+ class OneLine
7
+ def initialize(tokens, source_string)
8
+ @tokens = tokens
9
+ @source_string = source_string
10
+ end
11
+
12
+ def string
13
+ @tokens.map do |token|
14
+ string = @source_string[token.index, token.length]
15
+ string =~ /[^[:print:]]/ ? string.dump : string
16
+ end.join(" ")
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Tokens
6
+ class WithDescription
7
+ def initialize(tokens, source_string)
8
+ @tokens = tokens
9
+ @source_string = source_string
10
+ end
11
+
12
+ def string
13
+ @tokens.map do |token|
14
+ string = @source_string[token.index, token.length].dump
15
+ description = self.class.token_description(token.id)
16
+ value = token.value ? " (#{token.value})" : ""
17
+
18
+ "%-10s - %s%s" % [string, description, value]
19
+ end.join("\n")
20
+ end
21
+
22
+ def self.token_description(token)
23
+ case token
24
+ when Lexer::VERSION then "Version"
25
+ when Lexer::ARRAY_PREFIX then "Array beginning"
26
+ when Lexer::OBJECT_WITH_IVARS_PREFIX then "Special object with instance variables"
27
+ when Lexer::OBJECT_WITH_DUMP_PREFIX then "Object with #_dump and .load"
28
+ when Lexer::OBJECT_WITH_MARSHAL_DUMP_PREFIX then "Object with #marshal_dump and #marshal_load"
29
+ when Lexer::STRING_PREFIX then "String beginning"
30
+ when Lexer::HASH_PREFIX then "Hash beginning"
31
+ when Lexer::HASH_WITH_DEFAULT_VALUE_PREFIX then "Hash beginning (with defaul value)"
32
+ when Lexer::REGEXP_PREFIX then "Regexp beginning"
33
+ when Lexer::STRUCT_PREFIX then "Struct beginning"
34
+ when Lexer::TRUE then "true"
35
+ when Lexer::FALSE then "false"
36
+ when Lexer::NIL then "nil"
37
+ when Lexer::FLOAT_PREFIX then "Float beginning"
38
+ when Lexer::INTEGER_PREFIX then "Integer beginning"
39
+ when Lexer::BIG_INTEGER_PREFIX then "Big Integer beginning"
40
+ when Lexer::SYMBOL_PREFIX then "Symbol beginning"
41
+ when Lexer::SYMBOL_LINK_PREFIX then "Link to Symbol"
42
+ when Lexer::CLASS_PREFIX then "Class beginning"
43
+ when Lexer::MODULE_PREFIX then "Module beginning"
44
+ when Lexer::OBJECT_PREFIX then "Object beginning"
45
+ when Lexer::OBJECT_LINK_PREFIX then "Link to object"
46
+ when Lexer::OBJECT_EXTENDED_PREFIX then "Object extended with a module"
47
+ when Lexer::SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX then "Instance of a Core Library class subclass beginning"
48
+ when Lexer::FLOAT then "Float string representation"
49
+ when Lexer::INTEGER then "Integer encoded"
50
+ when Lexer::BIG_INTEGER then "Big Integer encoded"
51
+ when Lexer::STRING then "String characters"
52
+ when Lexer::SYMBOL then "Symbol characters"
53
+ when Lexer::PLUS_SIGN then "Sign '+'"
54
+ when Lexer::MINUS_SIGN then "Sign '-'"
55
+ when Lexer::UNKNOWN_SIGN then "Unknown sign (internal error)"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ class Lexer
5
+ # assign values 0, 1, 2, ...
6
+ VERSION,
7
+ ARRAY_PREFIX,
8
+ OBJECT_WITH_IVARS_PREFIX,
9
+ OBJECT_WITH_DUMP_PREFIX,
10
+ OBJECT_WITH_MARSHAL_DUMP_PREFIX,
11
+ STRING_PREFIX,
12
+ HASH_PREFIX,
13
+ HASH_WITH_DEFAULT_VALUE_PREFIX,
14
+ REGEXP_PREFIX,
15
+ STRUCT_PREFIX,
16
+ TRUE,
17
+ FALSE,
18
+ NIL,
19
+ FLOAT_PREFIX,
20
+ INTEGER_PREFIX,
21
+ BIG_INTEGER_PREFIX,
22
+ SYMBOL_PREFIX,
23
+ SYMBOL_LINK_PREFIX,
24
+ CLASS_PREFIX,
25
+ MODULE_PREFIX,
26
+ OBJECT_PREFIX,
27
+ OBJECT_LINK_PREFIX,
28
+ OBJECT_EXTENDED_PREFIX,
29
+ SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX,
30
+ FLOAT,
31
+ INTEGER,
32
+ BIG_INTEGER,
33
+ STRING,
34
+ SYMBOL,
35
+ PLUS_SIGN,
36
+ MINUS_SIGN,
37
+ UNKNOWN_SIGN = (0..100).to_a
38
+
39
+ Token = Struct.new(:id, :index, :length, :value)
40
+
41
+ attr_reader :tokens
42
+
43
+ def initialize(source_string)
44
+ @dump = source_string
45
+ @tokens = []
46
+ end
47
+
48
+ def run
49
+ @index = 0
50
+ @tokens = []
51
+
52
+ read_version
53
+ read
54
+ end
55
+
56
+ def source_string
57
+ @dump
58
+ end
59
+
60
+ private
61
+
62
+ def read_version
63
+ version = @dump[@index, 2]
64
+ version_unpacked = version.unpack("CC").join(".")
65
+ @tokens << Token.new(VERSION, @index, 2, version_unpacked)
66
+ @index += 2
67
+ end
68
+
69
+ def read
70
+ c = @dump[@index]
71
+ @index += 1
72
+
73
+ case c
74
+ when "["
75
+ @tokens << Token.new(ARRAY_PREFIX, @index - 1, 1)
76
+ read_array
77
+ when "I"
78
+ @tokens << Token.new(OBJECT_WITH_IVARS_PREFIX, @index - 1, 1)
79
+ read_object_with_instance_variables
80
+ when '"'
81
+ @tokens << Token.new(STRING_PREFIX, @index - 1, 1)
82
+ read_string
83
+ when "{"
84
+ @tokens << Token.new(HASH_PREFIX, @index - 1, 1)
85
+ read_hash
86
+ when "}"
87
+ @tokens << Token.new(HASH_WITH_DEFAULT_VALUE_PREFIX, @index - 1, 1)
88
+ read_hash_with_default_value
89
+ when "/"
90
+ @tokens << Token.new(REGEXP_PREFIX, @index - 1, 1)
91
+ read_regexp
92
+ when "S"
93
+ @tokens << Token.new(STRUCT_PREFIX, @index - 1, 1)
94
+ read_struct
95
+ when "T"
96
+ @tokens << Token.new(TRUE, @index - 1, 1)
97
+ when "F"
98
+ @tokens << Token.new(FALSE, @index - 1, 1)
99
+ when "0"
100
+ @tokens << Token.new(NIL, @index - 1, 1)
101
+ when ":"
102
+ @tokens << Token.new(SYMBOL_PREFIX, @index - 1, 1)
103
+ read_symbol
104
+ when ";"
105
+ @tokens << Token.new(SYMBOL_LINK_PREFIX, @index - 1, 1)
106
+ read_symbol_link
107
+ when "f"
108
+ @tokens << Token.new(FLOAT_PREFIX, @index - 1, 1)
109
+ read_float
110
+ when "i"
111
+ @tokens << Token.new(INTEGER_PREFIX, @index - 1, 1)
112
+ read_integer
113
+ when "l"
114
+ @tokens << Token.new(BIG_INTEGER_PREFIX, @index - 1, 1)
115
+ read_big_integer
116
+ when "c"
117
+ @tokens << Token.new(CLASS_PREFIX, @index - 1, 1)
118
+ read_class
119
+ when "m"
120
+ @tokens << Token.new(MODULE_PREFIX, @index - 1, 1)
121
+ read_module
122
+ when "C"
123
+ @tokens << Token.new(SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX, @index - 1, 1)
124
+ read_object_of_subclass_of_core_library_class
125
+ when "o"
126
+ @tokens << Token.new(OBJECT_PREFIX, @index - 1, 1)
127
+ read_object
128
+ when "@"
129
+ @tokens << Token.new(OBJECT_LINK_PREFIX, @index - 1, 1)
130
+ read_integer
131
+ when "e"
132
+ @tokens << Token.new(OBJECT_EXTENDED_PREFIX, @index - 1, 1)
133
+ read_object_extended
134
+ when "u"
135
+ @tokens << Token.new(OBJECT_WITH_DUMP_PREFIX, @index - 1, 1)
136
+ read_object_with_dump
137
+ when "U"
138
+ @tokens << Token.new(OBJECT_WITH_MARSHAL_DUMP_PREFIX, @index - 1, 1)
139
+ read_object_with_marshal_dump
140
+ else
141
+ raise "Unexpected character #{c.dump} (index=#{@index - 1})"
142
+ end
143
+ end
144
+
145
+ def read_array
146
+ count = read_integer
147
+ elements = (1..count).map { read }
148
+ end
149
+
150
+ def read_integer
151
+ index_base = @index
152
+
153
+ i = @dump[@index].unpack1("c")
154
+ @index += 1
155
+
156
+ case i
157
+ when 0
158
+ value = 0
159
+ when 1
160
+ value = @dump[@index].bytes[0]
161
+ @index += 1
162
+ when -1
163
+ value = @dump[@index].bytes[0] - 255 - 1
164
+ @index += 1
165
+ when 2
166
+ value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
167
+ @index += 2
168
+ when -2
169
+ value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF - 1
170
+ @index += 2
171
+ when 3
172
+ value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
173
+ @index += 3
174
+ when -3
175
+ value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF - 1
176
+ @index += 3
177
+ when 4
178
+ value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
179
+ @index += 4
180
+ when -4
181
+ value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF_FF - 1
182
+ @index += 4
183
+ else
184
+ value = i > 0 ? i - 5 : i + 5
185
+ end
186
+
187
+ @tokens << Token.new(INTEGER, index_base, @index - index_base, value)
188
+ value
189
+ end
190
+
191
+ def read_big_integer
192
+ sign = read_sign
193
+ i = read_integer
194
+ length = i * 2
195
+
196
+ value = @dump[@index, length].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
197
+ value = -value if sign.id == MINUS_SIGN
198
+ @tokens << Token.new(BIG_INTEGER, @index, length, value)
199
+
200
+ @index += length
201
+ end
202
+
203
+ def read_sign
204
+ c = @dump[@index]
205
+
206
+ token = \
207
+ case c
208
+ when "+"
209
+ Token.new(PLUS_SIGN, @index, 1)
210
+ when "-"
211
+ Token.new(MINUS_SIGN, @index, 1)
212
+ else
213
+ Token.new(UNKNOWN_SIGN, @index, 1)
214
+ end
215
+
216
+ @tokens << token
217
+ @index += 1
218
+ token
219
+ end
220
+
221
+ def read_object_with_instance_variables
222
+ object = read
223
+ ivars_count = read_integer
224
+
225
+ ivars_count.times do
226
+ name = read
227
+ value = read
228
+ end
229
+ end
230
+
231
+ def read_string
232
+ length = read_integer
233
+ @tokens << Token.new(STRING, @index, length)
234
+ @index += length
235
+ end
236
+
237
+ def read_symbol
238
+ length = read_integer
239
+ @tokens << Token.new(SYMBOL, @index, length)
240
+ @index += length
241
+ end
242
+
243
+ def read_symbol_link
244
+ read_integer
245
+ end
246
+
247
+ def read_hash
248
+ pairs_count = read_integer
249
+
250
+ pairs_count.times do
251
+ key = read
252
+ value = read
253
+ end
254
+ end
255
+
256
+ def read_hash_with_default_value
257
+ pairs_count = read_integer
258
+
259
+ pairs_count.times do
260
+ key = read
261
+ value = read
262
+ end
263
+
264
+ read # read devault value - any object
265
+ end
266
+
267
+ def read_regexp
268
+ read_string # read Regexp's source
269
+ read_integer # read flags
270
+ end
271
+
272
+ def read_struct
273
+ read # read symbol (class name)
274
+ member_count = read_integer
275
+
276
+ member_count.times do
277
+ read # read symbol (member name)
278
+ read # read object (member value)
279
+ end
280
+ end
281
+
282
+ def read_float
283
+ length = read_integer
284
+ string = @dump[@index, length]
285
+ @tokens << Token.new(FLOAT, @index, length, Float(string))
286
+ @index += length
287
+ end
288
+
289
+ def read_class
290
+ length = read_integer
291
+ @tokens << Token.new(STRING, @index, length)
292
+ @index += length
293
+ end
294
+
295
+ def read_module
296
+ length = read_integer
297
+ @tokens << Token.new(STRING, @index, length)
298
+ @index += length
299
+ end
300
+
301
+ def read_object_of_subclass_of_core_library_class
302
+ read # read symbol (class name)
303
+ read # read object
304
+ end
305
+
306
+ def read_object
307
+ read # read symbol (class name)
308
+ ivars_count = read_integer
309
+
310
+ ivars_count.times do
311
+ name = read
312
+ value = read
313
+ end
314
+ end
315
+
316
+ def read_object_extended
317
+ read # read symbol (module name)
318
+ read # read object itself
319
+ end
320
+
321
+ def read_object_with_dump
322
+ read # read symbol (class name)
323
+ read_string # read dumped string
324
+ end
325
+
326
+ def read_object_with_marshal_dump
327
+ read # read symbol (class name)
328
+ read # read object (what #marshal_dump returned)
329
+ end
330
+ end
331
+ end