yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
data/lib/yarp/pack.rb ADDED
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YARP
4
+ module Pack
5
+ %i[
6
+ SPACE
7
+ COMMENT
8
+ INTEGER
9
+ UTF8
10
+ BER
11
+ FLOAT
12
+ STRING_SPACE_PADDED
13
+ STRING_NULL_PADDED
14
+ STRING_NULL_TERMINATED
15
+ STRING_MSB
16
+ STRING_LSB
17
+ STRING_HEX_HIGH
18
+ STRING_HEX_LOW
19
+ STRING_UU
20
+ STRING_MIME
21
+ STRING_BASE64
22
+ STRING_FIXED
23
+ STRING_POINTER
24
+ MOVE
25
+ BACK
26
+ NULL
27
+
28
+ UNSIGNED
29
+ SIGNED
30
+ SIGNED_NA
31
+
32
+ AGNOSTIC_ENDIAN
33
+ LITTLE_ENDIAN
34
+ BIG_ENDIAN
35
+ NATIVE_ENDIAN
36
+ ENDIAN_NA
37
+
38
+ SIZE_SHORT
39
+ SIZE_INT
40
+ SIZE_LONG
41
+ SIZE_LONG_LONG
42
+ SIZE_8
43
+ SIZE_16
44
+ SIZE_32
45
+ SIZE_64
46
+ SIZE_P
47
+ SIZE_NA
48
+
49
+ LENGTH_FIXED
50
+ LENGTH_MAX
51
+ LENGTH_RELATIVE
52
+ LENGTH_NA
53
+ ].each do |const|
54
+ const_set(const, const)
55
+ end
56
+
57
+ class Directive
58
+ attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
59
+
60
+ def initialize(version, variant, source, type, signed, endian, size, length_type, length)
61
+ @version = version
62
+ @variant = variant
63
+ @source = source
64
+ @type = type
65
+ @signed = signed
66
+ @endian = endian
67
+ @size = size
68
+ @length_type = length_type
69
+ @length = length
70
+ end
71
+
72
+ ENDIAN_DESCRIPTIONS = {
73
+ AGNOSTIC_ENDIAN: 'agnostic',
74
+ LITTLE_ENDIAN: 'little-endian (VAX)',
75
+ BIG_ENDIAN: 'big-endian (network)',
76
+ NATIVE_ENDIAN: 'native-endian',
77
+ ENDIAN_NA: 'n/a'
78
+ }
79
+
80
+ SIGNED_DESCRIPTIONS = {
81
+ UNSIGNED: 'unsigned',
82
+ SIGNED: 'signed',
83
+ SIGNED_NA: 'n/a'
84
+ }
85
+
86
+ SIZE_DESCRIPTIONS = {
87
+ SIZE_SHORT: 'short',
88
+ SIZE_INT: 'int-width',
89
+ SIZE_LONG: 'long',
90
+ SIZE_LONG_LONG: 'long long',
91
+ SIZE_8: '8-bit',
92
+ SIZE_16: '16-bit',
93
+ SIZE_32: '32-bit',
94
+ SIZE_64: '64-bit',
95
+ SIZE_P: 'pointer-width'
96
+ }
97
+
98
+ def describe
99
+ case type
100
+ when SPACE
101
+ 'whitespace'
102
+ when COMMENT
103
+ 'comment'
104
+ when INTEGER
105
+ if size == SIZE_8
106
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
107
+ else
108
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
109
+ end
110
+ case length_type
111
+ when LENGTH_FIXED
112
+ if length > 1
113
+ base + ", x#{length}"
114
+ else
115
+ base
116
+ end
117
+ when LENGTH_MAX
118
+ base + ', as many as possible'
119
+ end
120
+ when UTF8
121
+ 'UTF-8 character'
122
+ when BER
123
+ 'BER-compressed integer'
124
+ when FLOAT
125
+ "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
126
+ when STRING_SPACE_PADDED
127
+ 'arbitrary binary string (space padded)'
128
+ when STRING_NULL_PADDED
129
+ 'arbitrary binary string (null padded, count is width)'
130
+ when STRING_NULL_TERMINATED
131
+ 'arbitrary binary string (null padded, count is width), except that null is added with *'
132
+ when STRING_MSB
133
+ 'bit string (MSB first)'
134
+ when STRING_LSB
135
+ 'bit string (LSB first)'
136
+ when STRING_HEX_HIGH
137
+ 'hex string (high nibble first)'
138
+ when STRING_HEX_LOW
139
+ 'hex string (low nibble first)'
140
+ when STRING_UU
141
+ 'UU-encoded string'
142
+ when STRING_MIME
143
+ 'quoted printable, MIME encoding'
144
+ when STRING_BASE64
145
+ 'base64 encoded string'
146
+ when STRING_FIXED
147
+ 'pointer to a structure (fixed-length string)'
148
+ when STRING_POINTER
149
+ 'pointer to a null-terminated string'
150
+ when MOVE
151
+ 'move to absolute position'
152
+ when BACK
153
+ 'back up a byte'
154
+ when NULL
155
+ 'null byte'
156
+ else
157
+ raise
158
+ end
159
+ end
160
+ end
161
+
162
+ class Format
163
+ attr_reader :directives, :encoding
164
+
165
+ def initialize(directives, encoding)
166
+ @directives = directives
167
+ @encoding = encoding
168
+ end
169
+
170
+ def describe
171
+ source_width = directives.map { |d| d.source.inspect.length }.max
172
+ directive_lines = directives.map do |directive|
173
+ if directive.type == SPACE
174
+ source = directive.source.inspect
175
+ else
176
+ source = directive.source
177
+ end
178
+ " #{source.ljust(source_width)} #{directive.describe}"
179
+ end
180
+
181
+ (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ripper"
4
+
5
+ module YARP
6
+ # This class is meant to provide a compatibility layer between YARP and
7
+ # Ripper. It functions by parsing the entire tree first and then walking it
8
+ # and executing each of the Ripper callbacks as it goes.
9
+ #
10
+ # This class is going to necessarily be slower than the native Ripper API. It
11
+ # is meant as a stopgap until developers migrate to using YARP. It is also
12
+ # meant as a test harness for the YARP parser.
13
+ class RipperCompat
14
+ # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
15
+ # returns the arrays of [type, *children].
16
+ class SexpBuilder < RipperCompat
17
+ private
18
+
19
+ Ripper::PARSER_EVENTS.each do |event|
20
+ define_method(:"on_#{event}") do |*args|
21
+ [event, *args]
22
+ end
23
+ end
24
+
25
+ Ripper::SCANNER_EVENTS.each do |event|
26
+ define_method(:"on_#{event}") do |value|
27
+ [:"@#{event}", value, [lineno, column]]
28
+ end
29
+ end
30
+ end
31
+
32
+ # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
33
+ # returns the same values as ::Ripper::SexpBuilder except with a couple of
34
+ # niceties that flatten linked lists into arrays.
35
+ class SexpBuilderPP < SexpBuilder
36
+ private
37
+
38
+ def _dispatch_event_new
39
+ []
40
+ end
41
+
42
+ def _dispatch_event_push(list, item)
43
+ list << item
44
+ list
45
+ end
46
+
47
+ Ripper::PARSER_EVENT_TABLE.each do |event, arity|
48
+ case event
49
+ when /_new\z/
50
+ alias :"on_#{event}" :_dispatch_event_new if arity == 0
51
+ when /_add\z/
52
+ alias :"on_#{event}" :_dispatch_event_push
53
+ end
54
+ end
55
+ end
56
+
57
+ attr_reader :source, :lineno, :column
58
+
59
+ def initialize(source)
60
+ @source = source
61
+ @result = nil
62
+ @lineno = nil
63
+ @column = nil
64
+ end
65
+
66
+ ############################################################################
67
+ # Public interface
68
+ ############################################################################
69
+
70
+ def error?
71
+ result.errors.any?
72
+ end
73
+
74
+ def parse
75
+ result.value.accept(self) unless error?
76
+ end
77
+
78
+ ############################################################################
79
+ # Visitor methods
80
+ ############################################################################
81
+
82
+ def visit(node)
83
+ node&.accept(self)
84
+ end
85
+
86
+ def visit_call_node(node)
87
+ if !node.opening_loc && node.arguments.arguments.length == 1
88
+ bounds(node.receiver.location)
89
+ left = visit(node.receiver)
90
+
91
+ bounds(node.arguments.arguments.first.location)
92
+ right = visit(node.arguments.arguments.first)
93
+
94
+ on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
95
+ else
96
+ raise NotImplementedError
97
+ end
98
+ end
99
+
100
+ def visit_integer_node(node)
101
+ bounds(node.location)
102
+ on_int(source[node.location.start_offset...node.location.end_offset])
103
+ end
104
+
105
+ def visit_statements_node(node)
106
+ bounds(node.location)
107
+ node.body.inject(on_stmts_new) do |stmts, stmt|
108
+ on_stmts_add(stmts, visit(stmt))
109
+ end
110
+ end
111
+
112
+ def visit_token(node)
113
+ bounds(node.location)
114
+
115
+ case node.type
116
+ when :MINUS
117
+ on_op(node.value)
118
+ when :PLUS
119
+ on_op(node.value)
120
+ else
121
+ raise NotImplementedError, "Unknown token: #{node.type}"
122
+ end
123
+ end
124
+
125
+ def visit_program_node(node)
126
+ bounds(node.location)
127
+ on_program(visit(node.statements))
128
+ end
129
+
130
+ ############################################################################
131
+ # Entrypoints for subclasses
132
+ ############################################################################
133
+
134
+ # This is a convenience method that runs the SexpBuilder subclass parser.
135
+ def self.sexp_raw(source)
136
+ SexpBuilder.new(source).parse
137
+ end
138
+
139
+ # This is a convenience method that runs the SexpBuilderPP subclass parser.
140
+ def self.sexp(source)
141
+ SexpBuilderPP.new(source).parse
142
+ end
143
+
144
+ private
145
+
146
+ # This method is responsible for updating lineno and column information
147
+ # to reflect the current node.
148
+ #
149
+ # This method could be drastically improved with some caching on the start
150
+ # of every line, but for now it's good enough.
151
+ def bounds(location)
152
+ start_offset = location.start_offset
153
+
154
+ @lineno = source[0..start_offset].count("\n") + 1
155
+ @column = start_offset - (source.rindex("\n", start_offset) || 0)
156
+ end
157
+
158
+ def result
159
+ @result ||= YARP.parse(source)
160
+ end
161
+
162
+ def _dispatch0; end
163
+ def _dispatch1(_); end
164
+ def _dispatch2(_, _); end
165
+ def _dispatch3(_, _, _); end
166
+ def _dispatch4(_, _, _, _); end
167
+ def _dispatch5(_, _, _, _, _); end
168
+ def _dispatch7(_, _, _, _, _, _, _); end
169
+
170
+ (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
171
+ alias :"on_#{event}" :"_dispatch#{arity}"
172
+ end
173
+ end
174
+ end