yarp 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
data/lib/yarp/pack.rb
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
module Pack
|
5
|
+
%i[
|
6
|
+
SPACE
|
7
|
+
COMMENT
|
8
|
+
INTEGER
|
9
|
+
UTF8
|
10
|
+
BER
|
11
|
+
FLOAT
|
12
|
+
STRING_SPACE_PADDED
|
13
|
+
STRING_NULL_PADDED
|
14
|
+
STRING_NULL_TERMINATED
|
15
|
+
STRING_MSB
|
16
|
+
STRING_LSB
|
17
|
+
STRING_HEX_HIGH
|
18
|
+
STRING_HEX_LOW
|
19
|
+
STRING_UU
|
20
|
+
STRING_MIME
|
21
|
+
STRING_BASE64
|
22
|
+
STRING_FIXED
|
23
|
+
STRING_POINTER
|
24
|
+
MOVE
|
25
|
+
BACK
|
26
|
+
NULL
|
27
|
+
|
28
|
+
UNSIGNED
|
29
|
+
SIGNED
|
30
|
+
SIGNED_NA
|
31
|
+
|
32
|
+
AGNOSTIC_ENDIAN
|
33
|
+
LITTLE_ENDIAN
|
34
|
+
BIG_ENDIAN
|
35
|
+
NATIVE_ENDIAN
|
36
|
+
ENDIAN_NA
|
37
|
+
|
38
|
+
SIZE_SHORT
|
39
|
+
SIZE_INT
|
40
|
+
SIZE_LONG
|
41
|
+
SIZE_LONG_LONG
|
42
|
+
SIZE_8
|
43
|
+
SIZE_16
|
44
|
+
SIZE_32
|
45
|
+
SIZE_64
|
46
|
+
SIZE_P
|
47
|
+
SIZE_NA
|
48
|
+
|
49
|
+
LENGTH_FIXED
|
50
|
+
LENGTH_MAX
|
51
|
+
LENGTH_RELATIVE
|
52
|
+
LENGTH_NA
|
53
|
+
].each do |const|
|
54
|
+
const_set(const, const)
|
55
|
+
end
|
56
|
+
|
57
|
+
class Directive
|
58
|
+
attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
|
59
|
+
|
60
|
+
def initialize(version, variant, source, type, signed, endian, size, length_type, length)
|
61
|
+
@version = version
|
62
|
+
@variant = variant
|
63
|
+
@source = source
|
64
|
+
@type = type
|
65
|
+
@signed = signed
|
66
|
+
@endian = endian
|
67
|
+
@size = size
|
68
|
+
@length_type = length_type
|
69
|
+
@length = length
|
70
|
+
end
|
71
|
+
|
72
|
+
ENDIAN_DESCRIPTIONS = {
|
73
|
+
AGNOSTIC_ENDIAN: 'agnostic',
|
74
|
+
LITTLE_ENDIAN: 'little-endian (VAX)',
|
75
|
+
BIG_ENDIAN: 'big-endian (network)',
|
76
|
+
NATIVE_ENDIAN: 'native-endian',
|
77
|
+
ENDIAN_NA: 'n/a'
|
78
|
+
}
|
79
|
+
|
80
|
+
SIGNED_DESCRIPTIONS = {
|
81
|
+
UNSIGNED: 'unsigned',
|
82
|
+
SIGNED: 'signed',
|
83
|
+
SIGNED_NA: 'n/a'
|
84
|
+
}
|
85
|
+
|
86
|
+
SIZE_DESCRIPTIONS = {
|
87
|
+
SIZE_SHORT: 'short',
|
88
|
+
SIZE_INT: 'int-width',
|
89
|
+
SIZE_LONG: 'long',
|
90
|
+
SIZE_LONG_LONG: 'long long',
|
91
|
+
SIZE_8: '8-bit',
|
92
|
+
SIZE_16: '16-bit',
|
93
|
+
SIZE_32: '32-bit',
|
94
|
+
SIZE_64: '64-bit',
|
95
|
+
SIZE_P: 'pointer-width'
|
96
|
+
}
|
97
|
+
|
98
|
+
def describe
|
99
|
+
case type
|
100
|
+
when SPACE
|
101
|
+
'whitespace'
|
102
|
+
when COMMENT
|
103
|
+
'comment'
|
104
|
+
when INTEGER
|
105
|
+
if size == SIZE_8
|
106
|
+
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
|
107
|
+
else
|
108
|
+
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
|
109
|
+
end
|
110
|
+
case length_type
|
111
|
+
when LENGTH_FIXED
|
112
|
+
if length > 1
|
113
|
+
base + ", x#{length}"
|
114
|
+
else
|
115
|
+
base
|
116
|
+
end
|
117
|
+
when LENGTH_MAX
|
118
|
+
base + ', as many as possible'
|
119
|
+
end
|
120
|
+
when UTF8
|
121
|
+
'UTF-8 character'
|
122
|
+
when BER
|
123
|
+
'BER-compressed integer'
|
124
|
+
when FLOAT
|
125
|
+
"#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
|
126
|
+
when STRING_SPACE_PADDED
|
127
|
+
'arbitrary binary string (space padded)'
|
128
|
+
when STRING_NULL_PADDED
|
129
|
+
'arbitrary binary string (null padded, count is width)'
|
130
|
+
when STRING_NULL_TERMINATED
|
131
|
+
'arbitrary binary string (null padded, count is width), except that null is added with *'
|
132
|
+
when STRING_MSB
|
133
|
+
'bit string (MSB first)'
|
134
|
+
when STRING_LSB
|
135
|
+
'bit string (LSB first)'
|
136
|
+
when STRING_HEX_HIGH
|
137
|
+
'hex string (high nibble first)'
|
138
|
+
when STRING_HEX_LOW
|
139
|
+
'hex string (low nibble first)'
|
140
|
+
when STRING_UU
|
141
|
+
'UU-encoded string'
|
142
|
+
when STRING_MIME
|
143
|
+
'quoted printable, MIME encoding'
|
144
|
+
when STRING_BASE64
|
145
|
+
'base64 encoded string'
|
146
|
+
when STRING_FIXED
|
147
|
+
'pointer to a structure (fixed-length string)'
|
148
|
+
when STRING_POINTER
|
149
|
+
'pointer to a null-terminated string'
|
150
|
+
when MOVE
|
151
|
+
'move to absolute position'
|
152
|
+
when BACK
|
153
|
+
'back up a byte'
|
154
|
+
when NULL
|
155
|
+
'null byte'
|
156
|
+
else
|
157
|
+
raise
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
class Format
|
163
|
+
attr_reader :directives, :encoding
|
164
|
+
|
165
|
+
def initialize(directives, encoding)
|
166
|
+
@directives = directives
|
167
|
+
@encoding = encoding
|
168
|
+
end
|
169
|
+
|
170
|
+
def describe
|
171
|
+
source_width = directives.map { |d| d.source.inspect.length }.max
|
172
|
+
directive_lines = directives.map do |directive|
|
173
|
+
if directive.type == SPACE
|
174
|
+
source = directive.source.inspect
|
175
|
+
else
|
176
|
+
source = directive.source
|
177
|
+
end
|
178
|
+
" #{source.ljust(source_width)} #{directive.describe}"
|
179
|
+
end
|
180
|
+
|
181
|
+
(['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ripper"
|
4
|
+
|
5
|
+
module YARP
|
6
|
+
# This class is meant to provide a compatibility layer between YARP and
|
7
|
+
# Ripper. It functions by parsing the entire tree first and then walking it
|
8
|
+
# and executing each of the Ripper callbacks as it goes.
|
9
|
+
#
|
10
|
+
# This class is going to necessarily be slower than the native Ripper API. It
|
11
|
+
# is meant as a stopgap until developers migrate to using YARP. It is also
|
12
|
+
# meant as a test harness for the YARP parser.
|
13
|
+
class RipperCompat
|
14
|
+
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
|
15
|
+
# returns the arrays of [type, *children].
|
16
|
+
class SexpBuilder < RipperCompat
|
17
|
+
private
|
18
|
+
|
19
|
+
Ripper::PARSER_EVENTS.each do |event|
|
20
|
+
define_method(:"on_#{event}") do |*args|
|
21
|
+
[event, *args]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
Ripper::SCANNER_EVENTS.each do |event|
|
26
|
+
define_method(:"on_#{event}") do |value|
|
27
|
+
[:"@#{event}", value, [lineno, column]]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
|
33
|
+
# returns the same values as ::Ripper::SexpBuilder except with a couple of
|
34
|
+
# niceties that flatten linked lists into arrays.
|
35
|
+
class SexpBuilderPP < SexpBuilder
|
36
|
+
private
|
37
|
+
|
38
|
+
def _dispatch_event_new
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def _dispatch_event_push(list, item)
|
43
|
+
list << item
|
44
|
+
list
|
45
|
+
end
|
46
|
+
|
47
|
+
Ripper::PARSER_EVENT_TABLE.each do |event, arity|
|
48
|
+
case event
|
49
|
+
when /_new\z/
|
50
|
+
alias :"on_#{event}" :_dispatch_event_new if arity == 0
|
51
|
+
when /_add\z/
|
52
|
+
alias :"on_#{event}" :_dispatch_event_push
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_reader :source, :lineno, :column
|
58
|
+
|
59
|
+
def initialize(source)
|
60
|
+
@source = source
|
61
|
+
@result = nil
|
62
|
+
@lineno = nil
|
63
|
+
@column = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
############################################################################
|
67
|
+
# Public interface
|
68
|
+
############################################################################
|
69
|
+
|
70
|
+
def error?
|
71
|
+
result.errors.any?
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse
|
75
|
+
result.value.accept(self) unless error?
|
76
|
+
end
|
77
|
+
|
78
|
+
############################################################################
|
79
|
+
# Visitor methods
|
80
|
+
############################################################################
|
81
|
+
|
82
|
+
def visit(node)
|
83
|
+
node&.accept(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
def visit_call_node(node)
|
87
|
+
if !node.opening_loc && node.arguments.arguments.length == 1
|
88
|
+
bounds(node.receiver.location)
|
89
|
+
left = visit(node.receiver)
|
90
|
+
|
91
|
+
bounds(node.arguments.arguments.first.location)
|
92
|
+
right = visit(node.arguments.arguments.first)
|
93
|
+
|
94
|
+
on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
|
95
|
+
else
|
96
|
+
raise NotImplementedError
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def visit_integer_node(node)
|
101
|
+
bounds(node.location)
|
102
|
+
on_int(source[node.location.start_offset...node.location.end_offset])
|
103
|
+
end
|
104
|
+
|
105
|
+
def visit_statements_node(node)
|
106
|
+
bounds(node.location)
|
107
|
+
node.body.inject(on_stmts_new) do |stmts, stmt|
|
108
|
+
on_stmts_add(stmts, visit(stmt))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def visit_token(node)
|
113
|
+
bounds(node.location)
|
114
|
+
|
115
|
+
case node.type
|
116
|
+
when :MINUS
|
117
|
+
on_op(node.value)
|
118
|
+
when :PLUS
|
119
|
+
on_op(node.value)
|
120
|
+
else
|
121
|
+
raise NotImplementedError, "Unknown token: #{node.type}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def visit_program_node(node)
|
126
|
+
bounds(node.location)
|
127
|
+
on_program(visit(node.statements))
|
128
|
+
end
|
129
|
+
|
130
|
+
############################################################################
|
131
|
+
# Entrypoints for subclasses
|
132
|
+
############################################################################
|
133
|
+
|
134
|
+
# This is a convenience method that runs the SexpBuilder subclass parser.
|
135
|
+
def self.sexp_raw(source)
|
136
|
+
SexpBuilder.new(source).parse
|
137
|
+
end
|
138
|
+
|
139
|
+
# This is a convenience method that runs the SexpBuilderPP subclass parser.
|
140
|
+
def self.sexp(source)
|
141
|
+
SexpBuilderPP.new(source).parse
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
# This method is responsible for updating lineno and column information
|
147
|
+
# to reflect the current node.
|
148
|
+
#
|
149
|
+
# This method could be drastically improved with some caching on the start
|
150
|
+
# of every line, but for now it's good enough.
|
151
|
+
def bounds(location)
|
152
|
+
start_offset = location.start_offset
|
153
|
+
|
154
|
+
@lineno = source[0..start_offset].count("\n") + 1
|
155
|
+
@column = start_offset - (source.rindex("\n", start_offset) || 0)
|
156
|
+
end
|
157
|
+
|
158
|
+
def result
|
159
|
+
@result ||= YARP.parse(source)
|
160
|
+
end
|
161
|
+
|
162
|
+
def _dispatch0; end
|
163
|
+
def _dispatch1(_); end
|
164
|
+
def _dispatch2(_, _); end
|
165
|
+
def _dispatch3(_, _, _); end
|
166
|
+
def _dispatch4(_, _, _, _); end
|
167
|
+
def _dispatch5(_, _, _, _, _); end
|
168
|
+
def _dispatch7(_, _, _, _, _, _, _); end
|
169
|
+
|
170
|
+
(Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
|
171
|
+
alias :"on_#{event}" :"_dispatch#{arity}"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|