yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
data/lib/yarp/pack.rb ADDED
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YARP
4
+ module Pack
5
+ %i[
6
+ SPACE
7
+ COMMENT
8
+ INTEGER
9
+ UTF8
10
+ BER
11
+ FLOAT
12
+ STRING_SPACE_PADDED
13
+ STRING_NULL_PADDED
14
+ STRING_NULL_TERMINATED
15
+ STRING_MSB
16
+ STRING_LSB
17
+ STRING_HEX_HIGH
18
+ STRING_HEX_LOW
19
+ STRING_UU
20
+ STRING_MIME
21
+ STRING_BASE64
22
+ STRING_FIXED
23
+ STRING_POINTER
24
+ MOVE
25
+ BACK
26
+ NULL
27
+
28
+ UNSIGNED
29
+ SIGNED
30
+ SIGNED_NA
31
+
32
+ AGNOSTIC_ENDIAN
33
+ LITTLE_ENDIAN
34
+ BIG_ENDIAN
35
+ NATIVE_ENDIAN
36
+ ENDIAN_NA
37
+
38
+ SIZE_SHORT
39
+ SIZE_INT
40
+ SIZE_LONG
41
+ SIZE_LONG_LONG
42
+ SIZE_8
43
+ SIZE_16
44
+ SIZE_32
45
+ SIZE_64
46
+ SIZE_P
47
+ SIZE_NA
48
+
49
+ LENGTH_FIXED
50
+ LENGTH_MAX
51
+ LENGTH_RELATIVE
52
+ LENGTH_NA
53
+ ].each do |const|
54
+ const_set(const, const)
55
+ end
56
+
57
+ class Directive
58
+ attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
59
+
60
+ def initialize(version, variant, source, type, signed, endian, size, length_type, length)
61
+ @version = version
62
+ @variant = variant
63
+ @source = source
64
+ @type = type
65
+ @signed = signed
66
+ @endian = endian
67
+ @size = size
68
+ @length_type = length_type
69
+ @length = length
70
+ end
71
+
72
+ ENDIAN_DESCRIPTIONS = {
73
+ AGNOSTIC_ENDIAN: 'agnostic',
74
+ LITTLE_ENDIAN: 'little-endian (VAX)',
75
+ BIG_ENDIAN: 'big-endian (network)',
76
+ NATIVE_ENDIAN: 'native-endian',
77
+ ENDIAN_NA: 'n/a'
78
+ }
79
+
80
+ SIGNED_DESCRIPTIONS = {
81
+ UNSIGNED: 'unsigned',
82
+ SIGNED: 'signed',
83
+ SIGNED_NA: 'n/a'
84
+ }
85
+
86
+ SIZE_DESCRIPTIONS = {
87
+ SIZE_SHORT: 'short',
88
+ SIZE_INT: 'int-width',
89
+ SIZE_LONG: 'long',
90
+ SIZE_LONG_LONG: 'long long',
91
+ SIZE_8: '8-bit',
92
+ SIZE_16: '16-bit',
93
+ SIZE_32: '32-bit',
94
+ SIZE_64: '64-bit',
95
+ SIZE_P: 'pointer-width'
96
+ }
97
+
98
+ def describe
99
+ case type
100
+ when SPACE
101
+ 'whitespace'
102
+ when COMMENT
103
+ 'comment'
104
+ when INTEGER
105
+ if size == SIZE_8
106
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
107
+ else
108
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
109
+ end
110
+ case length_type
111
+ when LENGTH_FIXED
112
+ if length > 1
113
+ base + ", x#{length}"
114
+ else
115
+ base
116
+ end
117
+ when LENGTH_MAX
118
+ base + ', as many as possible'
119
+ end
120
+ when UTF8
121
+ 'UTF-8 character'
122
+ when BER
123
+ 'BER-compressed integer'
124
+ when FLOAT
125
+ "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
126
+ when STRING_SPACE_PADDED
127
+ 'arbitrary binary string (space padded)'
128
+ when STRING_NULL_PADDED
129
+ 'arbitrary binary string (null padded, count is width)'
130
+ when STRING_NULL_TERMINATED
131
+ 'arbitrary binary string (null padded, count is width), except that null is added with *'
132
+ when STRING_MSB
133
+ 'bit string (MSB first)'
134
+ when STRING_LSB
135
+ 'bit string (LSB first)'
136
+ when STRING_HEX_HIGH
137
+ 'hex string (high nibble first)'
138
+ when STRING_HEX_LOW
139
+ 'hex string (low nibble first)'
140
+ when STRING_UU
141
+ 'UU-encoded string'
142
+ when STRING_MIME
143
+ 'quoted printable, MIME encoding'
144
+ when STRING_BASE64
145
+ 'base64 encoded string'
146
+ when STRING_FIXED
147
+ 'pointer to a structure (fixed-length string)'
148
+ when STRING_POINTER
149
+ 'pointer to a null-terminated string'
150
+ when MOVE
151
+ 'move to absolute position'
152
+ when BACK
153
+ 'back up a byte'
154
+ when NULL
155
+ 'null byte'
156
+ else
157
+ raise
158
+ end
159
+ end
160
+ end
161
+
162
+ class Format
163
+ attr_reader :directives, :encoding
164
+
165
+ def initialize(directives, encoding)
166
+ @directives = directives
167
+ @encoding = encoding
168
+ end
169
+
170
+ def describe
171
+ source_width = directives.map { |d| d.source.inspect.length }.max
172
+ directive_lines = directives.map do |directive|
173
+ if directive.type == SPACE
174
+ source = directive.source.inspect
175
+ else
176
+ source = directive.source
177
+ end
178
+ " #{source.ljust(source_width)} #{directive.describe}"
179
+ end
180
+
181
+ (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ripper"
4
+
5
+ module YARP
6
+ # This class is meant to provide a compatibility layer between YARP and
7
+ # Ripper. It functions by parsing the entire tree first and then walking it
8
+ # and executing each of the Ripper callbacks as it goes.
9
+ #
10
+ # This class is going to necessarily be slower than the native Ripper API. It
11
+ # is meant as a stopgap until developers migrate to using YARP. It is also
12
+ # meant as a test harness for the YARP parser.
13
+ class RipperCompat
14
+ # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
15
+ # returns the arrays of [type, *children].
16
+ class SexpBuilder < RipperCompat
17
+ private
18
+
19
+ Ripper::PARSER_EVENTS.each do |event|
20
+ define_method(:"on_#{event}") do |*args|
21
+ [event, *args]
22
+ end
23
+ end
24
+
25
+ Ripper::SCANNER_EVENTS.each do |event|
26
+ define_method(:"on_#{event}") do |value|
27
+ [:"@#{event}", value, [lineno, column]]
28
+ end
29
+ end
30
+ end
31
+
32
+ # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
33
+ # returns the same values as ::Ripper::SexpBuilder except with a couple of
34
+ # niceties that flatten linked lists into arrays.
35
+ class SexpBuilderPP < SexpBuilder
36
+ private
37
+
38
+ def _dispatch_event_new
39
+ []
40
+ end
41
+
42
+ def _dispatch_event_push(list, item)
43
+ list << item
44
+ list
45
+ end
46
+
47
+ Ripper::PARSER_EVENT_TABLE.each do |event, arity|
48
+ case event
49
+ when /_new\z/
50
+ alias :"on_#{event}" :_dispatch_event_new if arity == 0
51
+ when /_add\z/
52
+ alias :"on_#{event}" :_dispatch_event_push
53
+ end
54
+ end
55
+ end
56
+
57
+ attr_reader :source, :lineno, :column
58
+
59
+ def initialize(source)
60
+ @source = source
61
+ @result = nil
62
+ @lineno = nil
63
+ @column = nil
64
+ end
65
+
66
+ ############################################################################
67
+ # Public interface
68
+ ############################################################################
69
+
70
+ def error?
71
+ result.errors.any?
72
+ end
73
+
74
+ def parse
75
+ result.value.accept(self) unless error?
76
+ end
77
+
78
+ ############################################################################
79
+ # Visitor methods
80
+ ############################################################################
81
+
82
+ def visit(node)
83
+ node&.accept(self)
84
+ end
85
+
86
+ def visit_call_node(node)
87
+ if !node.opening_loc && node.arguments.arguments.length == 1
88
+ bounds(node.receiver.location)
89
+ left = visit(node.receiver)
90
+
91
+ bounds(node.arguments.arguments.first.location)
92
+ right = visit(node.arguments.arguments.first)
93
+
94
+ on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
95
+ else
96
+ raise NotImplementedError
97
+ end
98
+ end
99
+
100
+ def visit_integer_node(node)
101
+ bounds(node.location)
102
+ on_int(source[node.location.start_offset...node.location.end_offset])
103
+ end
104
+
105
+ def visit_statements_node(node)
106
+ bounds(node.location)
107
+ node.body.inject(on_stmts_new) do |stmts, stmt|
108
+ on_stmts_add(stmts, visit(stmt))
109
+ end
110
+ end
111
+
112
+ def visit_token(node)
113
+ bounds(node.location)
114
+
115
+ case node.type
116
+ when :MINUS
117
+ on_op(node.value)
118
+ when :PLUS
119
+ on_op(node.value)
120
+ else
121
+ raise NotImplementedError, "Unknown token: #{node.type}"
122
+ end
123
+ end
124
+
125
+ def visit_program_node(node)
126
+ bounds(node.location)
127
+ on_program(visit(node.statements))
128
+ end
129
+
130
+ ############################################################################
131
+ # Entrypoints for subclasses
132
+ ############################################################################
133
+
134
+ # This is a convenience method that runs the SexpBuilder subclass parser.
135
+ def self.sexp_raw(source)
136
+ SexpBuilder.new(source).parse
137
+ end
138
+
139
+ # This is a convenience method that runs the SexpBuilderPP subclass parser.
140
+ def self.sexp(source)
141
+ SexpBuilderPP.new(source).parse
142
+ end
143
+
144
+ private
145
+
146
+ # This method is responsible for updating lineno and column information
147
+ # to reflect the current node.
148
+ #
149
+ # This method could be drastically improved with some caching on the start
150
+ # of every line, but for now it's good enough.
151
+ def bounds(location)
152
+ start_offset = location.start_offset
153
+
154
+ @lineno = source[0..start_offset].count("\n") + 1
155
+ @column = start_offset - (source.rindex("\n", start_offset) || 0)
156
+ end
157
+
158
+ def result
159
+ @result ||= YARP.parse(source)
160
+ end
161
+
162
+ def _dispatch0; end
163
+ def _dispatch1(_); end
164
+ def _dispatch2(_, _); end
165
+ def _dispatch3(_, _, _); end
166
+ def _dispatch4(_, _, _, _); end
167
+ def _dispatch5(_, _, _, _, _); end
168
+ def _dispatch7(_, _, _, _, _, _, _); end
169
+
170
+ (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
171
+ alias :"on_#{event}" :"_dispatch#{arity}"
172
+ end
173
+ end
174
+ end