marshal-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module AST
6
+ class SExpression
7
+ def initialize(node, source_string, renderer)
8
+ @node = node
9
+ @source_string = source_string
10
+ @renderer = renderer
11
+ end
12
+
13
+ def string
14
+ entries = node_to_entries(@node)
15
+ block = Renderers::EntriesBlock.new(entries)
16
+ @renderer.render(block)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_entries(node)
22
+ child_entries = node.child_entities
23
+ .select { |e| (e.is_a?(Lexer::Token) && node.attributes.key?(e)) || e.is_a?(Parser::Node) }
24
+ .map do |entry|
25
+ case entry
26
+ when Lexer::Token
27
+ options = node.attributes[entry]
28
+ name = options[:name]
29
+ value = options[:value]
30
+
31
+ name = name.to_s.gsub(/_/, "-")
32
+
33
+ if value.is_a?(Lexer::Token)
34
+ value = @source_string[entry.index, entry.length].dump
35
+ end
36
+
37
+ Renderers::Line.new("(#{name} #{value})")
38
+ when Parser::Node
39
+ node_to_entries(entry)
40
+ end
41
+ end.flatten
42
+
43
+ name = node_to_name(node)
44
+ entries = [Renderers::Line.new("(#{name}")] + child_entries
45
+ close_bracket(entries.last)
46
+
47
+ raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
48
+
49
+ if node.is_a?(Parser::Annotatable)
50
+ string = entries[0].string
51
+ annotation = node.annotation
52
+ entries[0] = Renderers::LineAnnotated.new(string, annotation)
53
+ end
54
+
55
+ if entries.size > 1
56
+ [entries[0], Renderers::EntriesBlock.new(entries[1..])]
57
+ else
58
+ entries
59
+ end
60
+ end
61
+
62
+ # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
63
+ def node_to_name(node)
64
+ node.class.name.to_s
65
+ .split("::").last
66
+ .sub(/Node\Z/, "")
67
+ .gsub(/([a-z])([A-Z])/, '\1-\2')
68
+ .downcase
69
+ end
70
+
71
+ def close_bracket(entry)
72
+ case entry
73
+ when Renderers::Line
74
+ entry.string << ")"
75
+ when Renderers::EntriesBlock
76
+ close_bracket(entry.entries.last)
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module AST
6
+ class SExpressionCompact
7
+ def initialize(node, source_string, renderer)
8
+ @node = node
9
+ @source_string = source_string
10
+ @renderer = renderer
11
+ end
12
+
13
+ def string
14
+ entries = node_to_entries(@node)
15
+ block = Renderers::EntriesBlock.new(entries)
16
+ @renderer.render(block)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_entries(node)
22
+ child_entries = node.child_entities
23
+ .select { |e| e.is_a?(Parser::Node) || e == node.literal_token }
24
+ .map do |e|
25
+ if e.is_a?(Parser::Node)
26
+ node_to_entries(e)
27
+ else
28
+ literal_token = node.literal_token
29
+ value = node.attributes[literal_token][:value]
30
+
31
+ if value.is_a?(Lexer::Token)
32
+ content = @source_string[value.index, value.length].dump
33
+ else
34
+ content = value.to_s
35
+ end
36
+
37
+ Renderers::Line.new(content)
38
+ end
39
+ end
40
+ .flatten
41
+
42
+ name = node_to_name(node)
43
+ entries = [Renderers::Line.new(name)] + child_entries
44
+
45
+ if node.literal_token && (entries.size == 2 && entries.all?(Renderers::Line))
46
+ strings = entries.map(&:string)
47
+ entries = [Renderers::Line.new(strings.join(" "))]
48
+ end
49
+
50
+ unless node.always_leaf?
51
+ entries[0] = Renderers::Line.new("(#{entries[0].string}")
52
+ close_bracket(entries.last)
53
+ end
54
+
55
+ raise "Expected 1st entry to be Line" unless entries[0].is_a?(Renderers::Line)
56
+
57
+ if node.is_a?(Parser::Annotatable)
58
+ string = entries[0].string
59
+ annotation = node.annotation
60
+ entries[0] = Renderers::LineAnnotated.new(string, annotation)
61
+ end
62
+
63
+ if entries.size > 1
64
+ [entries[0], Renderers::EntriesBlock.new(entries[1..])]
65
+ else
66
+ entries
67
+ end
68
+ end
69
+
70
+ # MarshalParser::Parser::ObjectWithMarshalDumpMethod -> object-with-marshal-dump-method
71
+ def node_to_name(node)
72
+ node.class.name.to_s
73
+ .split("::").last
74
+ .sub(/Node\Z/, "")
75
+ .gsub(/([a-z])([A-Z])/, '\1-\2')
76
+ .downcase
77
+ end
78
+
79
+ def close_bracket(entry)
80
+ case entry
81
+ when Renderers::Line
82
+ entry.string << ")"
83
+ when Renderers::EntriesBlock
84
+ close_bracket(entry.entries.last)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Symbols
6
+ class Table
7
+ def initialize(symbols)
8
+ @symbols = symbols
9
+ end
10
+
11
+ def string
12
+ @symbols.map.with_index do |symbol, i|
13
+ "%-4d - :%s" % [i, symbol]
14
+ end.join("\n")
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Tokens
6
+ class OneLine
7
+ def initialize(tokens, source_string)
8
+ @tokens = tokens
9
+ @source_string = source_string
10
+ end
11
+
12
+ def string
13
+ @tokens.map do |token|
14
+ string = @source_string[token.index, token.length]
15
+ string =~ /[^[:print:]]/ ? string.dump : string
16
+ end.join(" ")
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ module Formatters
5
+ module Tokens
6
+ class WithDescription
7
+ def initialize(tokens, source_string)
8
+ @tokens = tokens
9
+ @source_string = source_string
10
+ end
11
+
12
+ def string
13
+ @tokens.map do |token|
14
+ string = @source_string[token.index, token.length].dump
15
+ description = self.class.token_description(token.id)
16
+ value = token.value ? " (#{token.value})" : ""
17
+
18
+ "%-10s - %s%s" % [string, description, value]
19
+ end.join("\n")
20
+ end
21
+
22
+ def self.token_description(token)
23
+ case token
24
+ when Lexer::VERSION then "Version"
25
+ when Lexer::ARRAY_PREFIX then "Array beginning"
26
+ when Lexer::OBJECT_WITH_IVARS_PREFIX then "Special object with instance variables"
27
+ when Lexer::OBJECT_WITH_DUMP_PREFIX then "Object with #_dump and .load"
28
+ when Lexer::OBJECT_WITH_MARSHAL_DUMP_PREFIX then "Object with #marshal_dump and #marshal_load"
29
+ when Lexer::STRING_PREFIX then "String beginning"
30
+ when Lexer::HASH_PREFIX then "Hash beginning"
31
+ when Lexer::HASH_WITH_DEFAULT_VALUE_PREFIX then "Hash beginning (with defaul value)"
32
+ when Lexer::REGEXP_PREFIX then "Regexp beginning"
33
+ when Lexer::STRUCT_PREFIX then "Struct beginning"
34
+ when Lexer::TRUE then "true"
35
+ when Lexer::FALSE then "false"
36
+ when Lexer::NIL then "nil"
37
+ when Lexer::FLOAT_PREFIX then "Float beginning"
38
+ when Lexer::INTEGER_PREFIX then "Integer beginning"
39
+ when Lexer::BIG_INTEGER_PREFIX then "Big Integer beginning"
40
+ when Lexer::SYMBOL_PREFIX then "Symbol beginning"
41
+ when Lexer::SYMBOL_LINK_PREFIX then "Link to Symbol"
42
+ when Lexer::CLASS_PREFIX then "Class beginning"
43
+ when Lexer::MODULE_PREFIX then "Module beginning"
44
+ when Lexer::OBJECT_PREFIX then "Object beginning"
45
+ when Lexer::OBJECT_LINK_PREFIX then "Link to object"
46
+ when Lexer::OBJECT_EXTENDED_PREFIX then "Object extended with a module"
47
+ when Lexer::SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX then "Instance of a Core Library class subclass beginning"
48
+ when Lexer::FLOAT then "Float string representation"
49
+ when Lexer::INTEGER then "Integer encoded"
50
+ when Lexer::BIG_INTEGER then "Big Integer encoded"
51
+ when Lexer::STRING then "String characters"
52
+ when Lexer::SYMBOL then "Symbol characters"
53
+ when Lexer::PLUS_SIGN then "Sign '+'"
54
+ when Lexer::MINUS_SIGN then "Sign '-'"
55
+ when Lexer::UNKNOWN_SIGN then "Unknown sign (internal error)"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarshalParser
4
+ class Lexer
5
+ # assign values 0, 1, 2, ...
6
+ VERSION,
7
+ ARRAY_PREFIX,
8
+ OBJECT_WITH_IVARS_PREFIX,
9
+ OBJECT_WITH_DUMP_PREFIX,
10
+ OBJECT_WITH_MARSHAL_DUMP_PREFIX,
11
+ STRING_PREFIX,
12
+ HASH_PREFIX,
13
+ HASH_WITH_DEFAULT_VALUE_PREFIX,
14
+ REGEXP_PREFIX,
15
+ STRUCT_PREFIX,
16
+ TRUE,
17
+ FALSE,
18
+ NIL,
19
+ FLOAT_PREFIX,
20
+ INTEGER_PREFIX,
21
+ BIG_INTEGER_PREFIX,
22
+ SYMBOL_PREFIX,
23
+ SYMBOL_LINK_PREFIX,
24
+ CLASS_PREFIX,
25
+ MODULE_PREFIX,
26
+ OBJECT_PREFIX,
27
+ OBJECT_LINK_PREFIX,
28
+ OBJECT_EXTENDED_PREFIX,
29
+ SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX,
30
+ FLOAT,
31
+ INTEGER,
32
+ BIG_INTEGER,
33
+ STRING,
34
+ SYMBOL,
35
+ PLUS_SIGN,
36
+ MINUS_SIGN,
37
+ UNKNOWN_SIGN = (0..100).to_a
38
+
39
+ Token = Struct.new(:id, :index, :length, :value)
40
+
41
+ attr_reader :tokens
42
+
43
+ def initialize(source_string)
44
+ @dump = source_string
45
+ @tokens = []
46
+ end
47
+
48
+ def run
49
+ @index = 0
50
+ @tokens = []
51
+
52
+ read_version
53
+ read
54
+ end
55
+
56
+ def source_string
57
+ @dump
58
+ end
59
+
60
+ private
61
+
62
+ def read_version
63
+ version = @dump[@index, 2]
64
+ version_unpacked = version.unpack("CC").join(".")
65
+ @tokens << Token.new(VERSION, @index, 2, version_unpacked)
66
+ @index += 2
67
+ end
68
+
69
+ def read
70
+ c = @dump[@index]
71
+ @index += 1
72
+
73
+ case c
74
+ when "["
75
+ @tokens << Token.new(ARRAY_PREFIX, @index - 1, 1)
76
+ read_array
77
+ when "I"
78
+ @tokens << Token.new(OBJECT_WITH_IVARS_PREFIX, @index - 1, 1)
79
+ read_object_with_instance_variables
80
+ when '"'
81
+ @tokens << Token.new(STRING_PREFIX, @index - 1, 1)
82
+ read_string
83
+ when "{"
84
+ @tokens << Token.new(HASH_PREFIX, @index - 1, 1)
85
+ read_hash
86
+ when "}"
87
+ @tokens << Token.new(HASH_WITH_DEFAULT_VALUE_PREFIX, @index - 1, 1)
88
+ read_hash_with_default_value
89
+ when "/"
90
+ @tokens << Token.new(REGEXP_PREFIX, @index - 1, 1)
91
+ read_regexp
92
+ when "S"
93
+ @tokens << Token.new(STRUCT_PREFIX, @index - 1, 1)
94
+ read_struct
95
+ when "T"
96
+ @tokens << Token.new(TRUE, @index - 1, 1)
97
+ when "F"
98
+ @tokens << Token.new(FALSE, @index - 1, 1)
99
+ when "0"
100
+ @tokens << Token.new(NIL, @index - 1, 1)
101
+ when ":"
102
+ @tokens << Token.new(SYMBOL_PREFIX, @index - 1, 1)
103
+ read_symbol
104
+ when ";"
105
+ @tokens << Token.new(SYMBOL_LINK_PREFIX, @index - 1, 1)
106
+ read_symbol_link
107
+ when "f"
108
+ @tokens << Token.new(FLOAT_PREFIX, @index - 1, 1)
109
+ read_float
110
+ when "i"
111
+ @tokens << Token.new(INTEGER_PREFIX, @index - 1, 1)
112
+ read_integer
113
+ when "l"
114
+ @tokens << Token.new(BIG_INTEGER_PREFIX, @index - 1, 1)
115
+ read_big_integer
116
+ when "c"
117
+ @tokens << Token.new(CLASS_PREFIX, @index - 1, 1)
118
+ read_class
119
+ when "m"
120
+ @tokens << Token.new(MODULE_PREFIX, @index - 1, 1)
121
+ read_module
122
+ when "C"
123
+ @tokens << Token.new(SUBCLASS_OF_CORE_LIBRARY_CLASS_PREFIX, @index - 1, 1)
124
+ read_object_of_subclass_of_core_library_class
125
+ when "o"
126
+ @tokens << Token.new(OBJECT_PREFIX, @index - 1, 1)
127
+ read_object
128
+ when "@"
129
+ @tokens << Token.new(OBJECT_LINK_PREFIX, @index - 1, 1)
130
+ read_integer
131
+ when "e"
132
+ @tokens << Token.new(OBJECT_EXTENDED_PREFIX, @index - 1, 1)
133
+ read_object_extended
134
+ when "u"
135
+ @tokens << Token.new(OBJECT_WITH_DUMP_PREFIX, @index - 1, 1)
136
+ read_object_with_dump
137
+ when "U"
138
+ @tokens << Token.new(OBJECT_WITH_MARSHAL_DUMP_PREFIX, @index - 1, 1)
139
+ read_object_with_marshal_dump
140
+ else
141
+ raise "Unexpected character #{c.dump} (index=#{@index - 1})"
142
+ end
143
+ end
144
+
145
+ def read_array
146
+ count = read_integer
147
+ elements = (1..count).map { read }
148
+ end
149
+
150
+ def read_integer
151
+ index_base = @index
152
+
153
+ i = @dump[@index].unpack1("c")
154
+ @index += 1
155
+
156
+ case i
157
+ when 0
158
+ value = 0
159
+ when 1
160
+ value = @dump[@index].bytes[0]
161
+ @index += 1
162
+ when -1
163
+ value = @dump[@index].bytes[0] - 255 - 1
164
+ @index += 1
165
+ when 2
166
+ value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
167
+ @index += 2
168
+ when -2
169
+ value = @dump[@index, 2].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF - 1
170
+ @index += 2
171
+ when 3
172
+ value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
173
+ @index += 3
174
+ when -3
175
+ value = @dump[@index, 3].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF - 1
176
+ @index += 3
177
+ when 4
178
+ value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
179
+ @index += 4
180
+ when -4
181
+ value = @dump[@index, 4].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte } - 0xFF_FF_FF_FF - 1
182
+ @index += 4
183
+ else
184
+ value = i > 0 ? i - 5 : i + 5
185
+ end
186
+
187
+ @tokens << Token.new(INTEGER, index_base, @index - index_base, value)
188
+ value
189
+ end
190
+
191
+ def read_big_integer
192
+ sign = read_sign
193
+ i = read_integer
194
+ length = i * 2
195
+
196
+ value = @dump[@index, length].bytes.reverse.reduce { |acc, byte| (acc << 8) + byte }
197
+ value = -value if sign.id == MINUS_SIGN
198
+ @tokens << Token.new(BIG_INTEGER, @index, length, value)
199
+
200
+ @index += length
201
+ end
202
+
203
+ def read_sign
204
+ c = @dump[@index]
205
+
206
+ token = \
207
+ case c
208
+ when "+"
209
+ Token.new(PLUS_SIGN, @index, 1)
210
+ when "-"
211
+ Token.new(MINUS_SIGN, @index, 1)
212
+ else
213
+ Token.new(UNKNOWN_SIGN, @index, 1)
214
+ end
215
+
216
+ @tokens << token
217
+ @index += 1
218
+ token
219
+ end
220
+
221
+ def read_object_with_instance_variables
222
+ object = read
223
+ ivars_count = read_integer
224
+
225
+ ivars_count.times do
226
+ name = read
227
+ value = read
228
+ end
229
+ end
230
+
231
+ def read_string
232
+ length = read_integer
233
+ @tokens << Token.new(STRING, @index, length)
234
+ @index += length
235
+ end
236
+
237
+ def read_symbol
238
+ length = read_integer
239
+ @tokens << Token.new(SYMBOL, @index, length)
240
+ @index += length
241
+ end
242
+
243
+ def read_symbol_link
244
+ read_integer
245
+ end
246
+
247
+ def read_hash
248
+ pairs_count = read_integer
249
+
250
+ pairs_count.times do
251
+ key = read
252
+ value = read
253
+ end
254
+ end
255
+
256
+ def read_hash_with_default_value
257
+ pairs_count = read_integer
258
+
259
+ pairs_count.times do
260
+ key = read
261
+ value = read
262
+ end
263
+
264
+ read # read devault value - any object
265
+ end
266
+
267
+ def read_regexp
268
+ read_string # read Regexp's source
269
+ read_integer # read flags
270
+ end
271
+
272
+ def read_struct
273
+ read # read symbol (class name)
274
+ member_count = read_integer
275
+
276
+ member_count.times do
277
+ read # read symbol (member name)
278
+ read # read object (member value)
279
+ end
280
+ end
281
+
282
+ def read_float
283
+ length = read_integer
284
+ string = @dump[@index, length]
285
+ @tokens << Token.new(FLOAT, @index, length, Float(string))
286
+ @index += length
287
+ end
288
+
289
+ def read_class
290
+ length = read_integer
291
+ @tokens << Token.new(STRING, @index, length)
292
+ @index += length
293
+ end
294
+
295
+ def read_module
296
+ length = read_integer
297
+ @tokens << Token.new(STRING, @index, length)
298
+ @index += length
299
+ end
300
+
301
+ def read_object_of_subclass_of_core_library_class
302
+ read # read symbol (class name)
303
+ read # read object
304
+ end
305
+
306
+ def read_object
307
+ read # read symbol (class name)
308
+ ivars_count = read_integer
309
+
310
+ ivars_count.times do
311
+ name = read
312
+ value = read
313
+ end
314
+ end
315
+
316
+ def read_object_extended
317
+ read # read symbol (module name)
318
+ read # read object itself
319
+ end
320
+
321
+ def read_object_with_dump
322
+ read # read symbol (class name)
323
+ read_string # read dumped string
324
+ end
325
+
326
+ def read_object_with_marshal_dump
327
+ read # read symbol (class name)
328
+ read # read object (what #marshal_dump returned)
329
+ end
330
+ end
331
+ end