yarp 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
data/lib/yarp/pack.rb
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
module Pack
|
5
|
+
%i[
|
6
|
+
SPACE
|
7
|
+
COMMENT
|
8
|
+
INTEGER
|
9
|
+
UTF8
|
10
|
+
BER
|
11
|
+
FLOAT
|
12
|
+
STRING_SPACE_PADDED
|
13
|
+
STRING_NULL_PADDED
|
14
|
+
STRING_NULL_TERMINATED
|
15
|
+
STRING_MSB
|
16
|
+
STRING_LSB
|
17
|
+
STRING_HEX_HIGH
|
18
|
+
STRING_HEX_LOW
|
19
|
+
STRING_UU
|
20
|
+
STRING_MIME
|
21
|
+
STRING_BASE64
|
22
|
+
STRING_FIXED
|
23
|
+
STRING_POINTER
|
24
|
+
MOVE
|
25
|
+
BACK
|
26
|
+
NULL
|
27
|
+
|
28
|
+
UNSIGNED
|
29
|
+
SIGNED
|
30
|
+
SIGNED_NA
|
31
|
+
|
32
|
+
AGNOSTIC_ENDIAN
|
33
|
+
LITTLE_ENDIAN
|
34
|
+
BIG_ENDIAN
|
35
|
+
NATIVE_ENDIAN
|
36
|
+
ENDIAN_NA
|
37
|
+
|
38
|
+
SIZE_SHORT
|
39
|
+
SIZE_INT
|
40
|
+
SIZE_LONG
|
41
|
+
SIZE_LONG_LONG
|
42
|
+
SIZE_8
|
43
|
+
SIZE_16
|
44
|
+
SIZE_32
|
45
|
+
SIZE_64
|
46
|
+
SIZE_P
|
47
|
+
SIZE_NA
|
48
|
+
|
49
|
+
LENGTH_FIXED
|
50
|
+
LENGTH_MAX
|
51
|
+
LENGTH_RELATIVE
|
52
|
+
LENGTH_NA
|
53
|
+
].each do |const|
|
54
|
+
const_set(const, const)
|
55
|
+
end
|
56
|
+
|
57
|
+
class Directive
|
58
|
+
attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
|
59
|
+
|
60
|
+
def initialize(version, variant, source, type, signed, endian, size, length_type, length)
|
61
|
+
@version = version
|
62
|
+
@variant = variant
|
63
|
+
@source = source
|
64
|
+
@type = type
|
65
|
+
@signed = signed
|
66
|
+
@endian = endian
|
67
|
+
@size = size
|
68
|
+
@length_type = length_type
|
69
|
+
@length = length
|
70
|
+
end
|
71
|
+
|
72
|
+
ENDIAN_DESCRIPTIONS = {
|
73
|
+
AGNOSTIC_ENDIAN: 'agnostic',
|
74
|
+
LITTLE_ENDIAN: 'little-endian (VAX)',
|
75
|
+
BIG_ENDIAN: 'big-endian (network)',
|
76
|
+
NATIVE_ENDIAN: 'native-endian',
|
77
|
+
ENDIAN_NA: 'n/a'
|
78
|
+
}
|
79
|
+
|
80
|
+
SIGNED_DESCRIPTIONS = {
|
81
|
+
UNSIGNED: 'unsigned',
|
82
|
+
SIGNED: 'signed',
|
83
|
+
SIGNED_NA: 'n/a'
|
84
|
+
}
|
85
|
+
|
86
|
+
SIZE_DESCRIPTIONS = {
|
87
|
+
SIZE_SHORT: 'short',
|
88
|
+
SIZE_INT: 'int-width',
|
89
|
+
SIZE_LONG: 'long',
|
90
|
+
SIZE_LONG_LONG: 'long long',
|
91
|
+
SIZE_8: '8-bit',
|
92
|
+
SIZE_16: '16-bit',
|
93
|
+
SIZE_32: '32-bit',
|
94
|
+
SIZE_64: '64-bit',
|
95
|
+
SIZE_P: 'pointer-width'
|
96
|
+
}
|
97
|
+
|
98
|
+
def describe
|
99
|
+
case type
|
100
|
+
when SPACE
|
101
|
+
'whitespace'
|
102
|
+
when COMMENT
|
103
|
+
'comment'
|
104
|
+
when INTEGER
|
105
|
+
if size == SIZE_8
|
106
|
+
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
|
107
|
+
else
|
108
|
+
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
|
109
|
+
end
|
110
|
+
case length_type
|
111
|
+
when LENGTH_FIXED
|
112
|
+
if length > 1
|
113
|
+
base + ", x#{length}"
|
114
|
+
else
|
115
|
+
base
|
116
|
+
end
|
117
|
+
when LENGTH_MAX
|
118
|
+
base + ', as many as possible'
|
119
|
+
end
|
120
|
+
when UTF8
|
121
|
+
'UTF-8 character'
|
122
|
+
when BER
|
123
|
+
'BER-compressed integer'
|
124
|
+
when FLOAT
|
125
|
+
"#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
|
126
|
+
when STRING_SPACE_PADDED
|
127
|
+
'arbitrary binary string (space padded)'
|
128
|
+
when STRING_NULL_PADDED
|
129
|
+
'arbitrary binary string (null padded, count is width)'
|
130
|
+
when STRING_NULL_TERMINATED
|
131
|
+
'arbitrary binary string (null padded, count is width), except that null is added with *'
|
132
|
+
when STRING_MSB
|
133
|
+
'bit string (MSB first)'
|
134
|
+
when STRING_LSB
|
135
|
+
'bit string (LSB first)'
|
136
|
+
when STRING_HEX_HIGH
|
137
|
+
'hex string (high nibble first)'
|
138
|
+
when STRING_HEX_LOW
|
139
|
+
'hex string (low nibble first)'
|
140
|
+
when STRING_UU
|
141
|
+
'UU-encoded string'
|
142
|
+
when STRING_MIME
|
143
|
+
'quoted printable, MIME encoding'
|
144
|
+
when STRING_BASE64
|
145
|
+
'base64 encoded string'
|
146
|
+
when STRING_FIXED
|
147
|
+
'pointer to a structure (fixed-length string)'
|
148
|
+
when STRING_POINTER
|
149
|
+
'pointer to a null-terminated string'
|
150
|
+
when MOVE
|
151
|
+
'move to absolute position'
|
152
|
+
when BACK
|
153
|
+
'back up a byte'
|
154
|
+
when NULL
|
155
|
+
'null byte'
|
156
|
+
else
|
157
|
+
raise
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
class Format
|
163
|
+
attr_reader :directives, :encoding
|
164
|
+
|
165
|
+
def initialize(directives, encoding)
|
166
|
+
@directives = directives
|
167
|
+
@encoding = encoding
|
168
|
+
end
|
169
|
+
|
170
|
+
def describe
|
171
|
+
source_width = directives.map { |d| d.source.inspect.length }.max
|
172
|
+
directive_lines = directives.map do |directive|
|
173
|
+
if directive.type == SPACE
|
174
|
+
source = directive.source.inspect
|
175
|
+
else
|
176
|
+
source = directive.source
|
177
|
+
end
|
178
|
+
" #{source.ljust(source_width)} #{directive.describe}"
|
179
|
+
end
|
180
|
+
|
181
|
+
(['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ripper"
|
4
|
+
|
5
|
+
module YARP
|
6
|
+
# This class is meant to provide a compatibility layer between YARP and
|
7
|
+
# Ripper. It functions by parsing the entire tree first and then walking it
|
8
|
+
# and executing each of the Ripper callbacks as it goes.
|
9
|
+
#
|
10
|
+
# This class is going to necessarily be slower than the native Ripper API. It
|
11
|
+
# is meant as a stopgap until developers migrate to using YARP. It is also
|
12
|
+
# meant as a test harness for the YARP parser.
|
13
|
+
class RipperCompat
|
14
|
+
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
|
15
|
+
# returns the arrays of [type, *children].
|
16
|
+
class SexpBuilder < RipperCompat
|
17
|
+
private
|
18
|
+
|
19
|
+
Ripper::PARSER_EVENTS.each do |event|
|
20
|
+
define_method(:"on_#{event}") do |*args|
|
21
|
+
[event, *args]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
Ripper::SCANNER_EVENTS.each do |event|
|
26
|
+
define_method(:"on_#{event}") do |value|
|
27
|
+
[:"@#{event}", value, [lineno, column]]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
|
33
|
+
# returns the same values as ::Ripper::SexpBuilder except with a couple of
|
34
|
+
# niceties that flatten linked lists into arrays.
|
35
|
+
class SexpBuilderPP < SexpBuilder
|
36
|
+
private
|
37
|
+
|
38
|
+
def _dispatch_event_new
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def _dispatch_event_push(list, item)
|
43
|
+
list << item
|
44
|
+
list
|
45
|
+
end
|
46
|
+
|
47
|
+
Ripper::PARSER_EVENT_TABLE.each do |event, arity|
|
48
|
+
case event
|
49
|
+
when /_new\z/
|
50
|
+
alias :"on_#{event}" :_dispatch_event_new if arity == 0
|
51
|
+
when /_add\z/
|
52
|
+
alias :"on_#{event}" :_dispatch_event_push
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_reader :source, :lineno, :column
|
58
|
+
|
59
|
+
def initialize(source)
|
60
|
+
@source = source
|
61
|
+
@result = nil
|
62
|
+
@lineno = nil
|
63
|
+
@column = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
############################################################################
|
67
|
+
# Public interface
|
68
|
+
############################################################################
|
69
|
+
|
70
|
+
def error?
|
71
|
+
result.errors.any?
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse
|
75
|
+
result.value.accept(self) unless error?
|
76
|
+
end
|
77
|
+
|
78
|
+
############################################################################
|
79
|
+
# Visitor methods
|
80
|
+
############################################################################
|
81
|
+
|
82
|
+
def visit(node)
|
83
|
+
node&.accept(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
def visit_call_node(node)
|
87
|
+
if !node.opening_loc && node.arguments.arguments.length == 1
|
88
|
+
bounds(node.receiver.location)
|
89
|
+
left = visit(node.receiver)
|
90
|
+
|
91
|
+
bounds(node.arguments.arguments.first.location)
|
92
|
+
right = visit(node.arguments.arguments.first)
|
93
|
+
|
94
|
+
on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
|
95
|
+
else
|
96
|
+
raise NotImplementedError
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def visit_integer_node(node)
|
101
|
+
bounds(node.location)
|
102
|
+
on_int(source[node.location.start_offset...node.location.end_offset])
|
103
|
+
end
|
104
|
+
|
105
|
+
def visit_statements_node(node)
|
106
|
+
bounds(node.location)
|
107
|
+
node.body.inject(on_stmts_new) do |stmts, stmt|
|
108
|
+
on_stmts_add(stmts, visit(stmt))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def visit_token(node)
|
113
|
+
bounds(node.location)
|
114
|
+
|
115
|
+
case node.type
|
116
|
+
when :MINUS
|
117
|
+
on_op(node.value)
|
118
|
+
when :PLUS
|
119
|
+
on_op(node.value)
|
120
|
+
else
|
121
|
+
raise NotImplementedError, "Unknown token: #{node.type}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def visit_program_node(node)
|
126
|
+
bounds(node.location)
|
127
|
+
on_program(visit(node.statements))
|
128
|
+
end
|
129
|
+
|
130
|
+
############################################################################
|
131
|
+
# Entrypoints for subclasses
|
132
|
+
############################################################################
|
133
|
+
|
134
|
+
# This is a convenience method that runs the SexpBuilder subclass parser.
|
135
|
+
def self.sexp_raw(source)
|
136
|
+
SexpBuilder.new(source).parse
|
137
|
+
end
|
138
|
+
|
139
|
+
# This is a convenience method that runs the SexpBuilderPP subclass parser.
|
140
|
+
def self.sexp(source)
|
141
|
+
SexpBuilderPP.new(source).parse
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
# This method is responsible for updating lineno and column information
|
147
|
+
# to reflect the current node.
|
148
|
+
#
|
149
|
+
# This method could be drastically improved with some caching on the start
|
150
|
+
# of every line, but for now it's good enough.
|
151
|
+
def bounds(location)
|
152
|
+
start_offset = location.start_offset
|
153
|
+
|
154
|
+
@lineno = source[0..start_offset].count("\n") + 1
|
155
|
+
@column = start_offset - (source.rindex("\n", start_offset) || 0)
|
156
|
+
end
|
157
|
+
|
158
|
+
def result
|
159
|
+
@result ||= YARP.parse(source)
|
160
|
+
end
|
161
|
+
|
162
|
+
def _dispatch0; end
|
163
|
+
def _dispatch1(_); end
|
164
|
+
def _dispatch2(_, _); end
|
165
|
+
def _dispatch3(_, _, _); end
|
166
|
+
def _dispatch4(_, _, _, _); end
|
167
|
+
def _dispatch5(_, _, _, _, _); end
|
168
|
+
def _dispatch7(_, _, _, _, _, _, _); end
|
169
|
+
|
170
|
+
(Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
|
171
|
+
alias :"on_#{event}" :"_dispatch#{arity}"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|