prism 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Here we are reopening the prism module to provide methods on nodes that aren't
4
+ # templated and are meant as convenience methods.
5
+ module Prism
6
+ class FloatNode < Node
7
+ # Returns the value of the node as a Ruby Float.
8
+ def value
9
+ Float(slice)
10
+ end
11
+ end
12
+
13
+ class ImaginaryNode < Node
14
+ # Returns the value of the node as a Ruby Complex.
15
+ def value
16
+ Complex(0, numeric.value)
17
+ end
18
+ end
19
+
20
+ class IntegerNode < Node
21
+ # Returns the value of the node as a Ruby Integer.
22
+ def value
23
+ Integer(slice)
24
+ end
25
+ end
26
+
27
+ class InterpolatedRegularExpressionNode < Node
28
+ # Returns a numeric value that represents the flags that were used to create
29
+ # the regular expression.
30
+ def options
31
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
32
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
33
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
34
+ o
35
+ end
36
+ end
37
+
38
+ class RationalNode < Node
39
+ # Returns the value of the node as a Ruby Rational.
40
+ def value
41
+ Rational(slice.chomp("r"))
42
+ end
43
+ end
44
+
45
+ class RegularExpressionNode < Node
46
+ # Returns a numeric value that represents the flags that were used to create
47
+ # the regular expression.
48
+ def options
49
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
50
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
51
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
52
+ o
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This object is responsible for generating the output for the inspect method
5
+ # implementations of child nodes.
6
+ class NodeInspector
7
+ attr_reader :prefix, :output
8
+
9
+ def initialize(prefix = "")
10
+ @prefix = prefix
11
+ @output = +""
12
+ end
13
+
14
+ # Appends a line to the output with the current prefix.
15
+ def <<(line)
16
+ output << "#{prefix}#{line}"
17
+ end
18
+
19
+ # This generates a string that is used as the header of the inspect output
20
+ # for any given node.
21
+ def header(node)
22
+ output = +"@ #{node.class.name.split("::").last} ("
23
+ output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})"
24
+ output << ", newline: true" if node.newline?
25
+ output << ")\n"
26
+ output
27
+ end
28
+
29
+ # Generates a string that represents a list of nodes. It handles properly
30
+ # using the box drawing characters to make the output look nice.
31
+ def list(prefix, nodes)
32
+ output = +"(length: #{nodes.length})\n"
33
+ last_index = nodes.length - 1
34
+
35
+ nodes.each_with_index do |node, index|
36
+ pointer, preadd = (index == last_index) ? ["└── ", " "] : ["├── ", "│ "]
37
+ node_prefix = "#{prefix}#{preadd}"
38
+ output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}")
39
+ end
40
+
41
+ output
42
+ end
43
+
44
+ # Generates a string that represents a location field on a node.
45
+ def location(value)
46
+ if value
47
+ "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}"
48
+ else
49
+ "∅"
50
+ end
51
+ end
52
+
53
+ # Generates a string that represents a child node.
54
+ def child_node(node, append)
55
+ node.inspect(child_inspector(append)).delete_prefix(prefix)
56
+ end
57
+
58
+ # Returns a new inspector that can be used to inspect a child node.
59
+ def child_inspector(append)
60
+ NodeInspector.new("#{prefix}#{append}")
61
+ end
62
+
63
+ # Returns the output as a string.
64
+ def to_str
65
+ output
66
+ end
67
+ end
68
+ end
data/lib/prism/pack.rb ADDED
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Pack
5
+ %i[
6
+ SPACE
7
+ COMMENT
8
+ INTEGER
9
+ UTF8
10
+ BER
11
+ FLOAT
12
+ STRING_SPACE_PADDED
13
+ STRING_NULL_PADDED
14
+ STRING_NULL_TERMINATED
15
+ STRING_MSB
16
+ STRING_LSB
17
+ STRING_HEX_HIGH
18
+ STRING_HEX_LOW
19
+ STRING_UU
20
+ STRING_MIME
21
+ STRING_BASE64
22
+ STRING_FIXED
23
+ STRING_POINTER
24
+ MOVE
25
+ BACK
26
+ NULL
27
+
28
+ UNSIGNED
29
+ SIGNED
30
+ SIGNED_NA
31
+
32
+ AGNOSTIC_ENDIAN
33
+ LITTLE_ENDIAN
34
+ BIG_ENDIAN
35
+ NATIVE_ENDIAN
36
+ ENDIAN_NA
37
+
38
+ SIZE_SHORT
39
+ SIZE_INT
40
+ SIZE_LONG
41
+ SIZE_LONG_LONG
42
+ SIZE_8
43
+ SIZE_16
44
+ SIZE_32
45
+ SIZE_64
46
+ SIZE_P
47
+ SIZE_NA
48
+
49
+ LENGTH_FIXED
50
+ LENGTH_MAX
51
+ LENGTH_RELATIVE
52
+ LENGTH_NA
53
+ ].each do |const|
54
+ const_set(const, const)
55
+ end
56
+
57
+ class Directive
58
+ attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
59
+
60
+ def initialize(version, variant, source, type, signed, endian, size, length_type, length)
61
+ @version = version
62
+ @variant = variant
63
+ @source = source
64
+ @type = type
65
+ @signed = signed
66
+ @endian = endian
67
+ @size = size
68
+ @length_type = length_type
69
+ @length = length
70
+ end
71
+
72
+ ENDIAN_DESCRIPTIONS = {
73
+ AGNOSTIC_ENDIAN: 'agnostic',
74
+ LITTLE_ENDIAN: 'little-endian (VAX)',
75
+ BIG_ENDIAN: 'big-endian (network)',
76
+ NATIVE_ENDIAN: 'native-endian',
77
+ ENDIAN_NA: 'n/a'
78
+ }
79
+
80
+ SIGNED_DESCRIPTIONS = {
81
+ UNSIGNED: 'unsigned',
82
+ SIGNED: 'signed',
83
+ SIGNED_NA: 'n/a'
84
+ }
85
+
86
+ SIZE_DESCRIPTIONS = {
87
+ SIZE_SHORT: 'short',
88
+ SIZE_INT: 'int-width',
89
+ SIZE_LONG: 'long',
90
+ SIZE_LONG_LONG: 'long long',
91
+ SIZE_8: '8-bit',
92
+ SIZE_16: '16-bit',
93
+ SIZE_32: '32-bit',
94
+ SIZE_64: '64-bit',
95
+ SIZE_P: 'pointer-width'
96
+ }
97
+
98
+ def describe
99
+ case type
100
+ when SPACE
101
+ 'whitespace'
102
+ when COMMENT
103
+ 'comment'
104
+ when INTEGER
105
+ if size == SIZE_8
106
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
107
+ else
108
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
109
+ end
110
+ case length_type
111
+ when LENGTH_FIXED
112
+ if length > 1
113
+ base + ", x#{length}"
114
+ else
115
+ base
116
+ end
117
+ when LENGTH_MAX
118
+ base + ', as many as possible'
119
+ end
120
+ when UTF8
121
+ 'UTF-8 character'
122
+ when BER
123
+ 'BER-compressed integer'
124
+ when FLOAT
125
+ "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
126
+ when STRING_SPACE_PADDED
127
+ 'arbitrary binary string (space padded)'
128
+ when STRING_NULL_PADDED
129
+ 'arbitrary binary string (null padded, count is width)'
130
+ when STRING_NULL_TERMINATED
131
+ 'arbitrary binary string (null padded, count is width), except that null is added with *'
132
+ when STRING_MSB
133
+ 'bit string (MSB first)'
134
+ when STRING_LSB
135
+ 'bit string (LSB first)'
136
+ when STRING_HEX_HIGH
137
+ 'hex string (high nibble first)'
138
+ when STRING_HEX_LOW
139
+ 'hex string (low nibble first)'
140
+ when STRING_UU
141
+ 'UU-encoded string'
142
+ when STRING_MIME
143
+ 'quoted printable, MIME encoding'
144
+ when STRING_BASE64
145
+ 'base64 encoded string'
146
+ when STRING_FIXED
147
+ 'pointer to a structure (fixed-length string)'
148
+ when STRING_POINTER
149
+ 'pointer to a null-terminated string'
150
+ when MOVE
151
+ 'move to absolute position'
152
+ when BACK
153
+ 'back up a byte'
154
+ when NULL
155
+ 'null byte'
156
+ else
157
+ raise
158
+ end
159
+ end
160
+ end
161
+
162
+ class Format
163
+ attr_reader :directives, :encoding
164
+
165
+ def initialize(directives, encoding)
166
+ @directives = directives
167
+ @encoding = encoding
168
+ end
169
+
170
+ def describe
171
+ source_width = directives.map { |d| d.source.inspect.length }.max
172
+ directive_lines = directives.map do |directive|
173
+ if directive.type == SPACE
174
+ source = directive.source.inspect
175
+ else
176
+ source = directive.source
177
+ end
178
+ " #{source.ljust(source_width)} #{directive.describe}"
179
+ end
180
+
181
+ (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # When we've parsed the source, we have both the syntax tree and the list of
6
+ # comments that we found in the source. This class is responsible for
7
+ # walking the tree and finding the nearest location to attach each comment.
8
+ #
9
+ # It does this by first finding the nearest locations to each comment.
10
+ # Locations can either come from nodes directly or from location fields on
11
+ # nodes. For example, a `ClassNode` has an overall location encompassing the
12
+ # entire class, but it also has a location for the `class` keyword.
13
+ #
14
+ # Once the nearest locations are found, it determines which one to attach
15
+ # to. If it's a trailing comment (a comment on the same line as other source
16
+ # code), it will favor attaching to the nearest location that occurs before
17
+ # the comment. Otherwise it will favor attaching to the nearest location
18
+ # that is after the comment.
19
+ class Comments
20
+ # A target for attaching comments that is based on a specific node's
21
+ # location.
22
+ class NodeTarget
23
+ attr_reader :node
24
+
25
+ def initialize(node)
26
+ @node = node
27
+ end
28
+
29
+ def start_offset
30
+ node.location.start_offset
31
+ end
32
+
33
+ def end_offset
34
+ node.location.end_offset
35
+ end
36
+
37
+ def encloses?(comment)
38
+ start_offset <= comment.location.start_offset &&
39
+ comment.location.end_offset <= end_offset
40
+ end
41
+
42
+ def <<(comment)
43
+ node.location.comments << comment
44
+ end
45
+ end
46
+
47
+ # A target for attaching comments that is based on a location field on a
48
+ # node. For example, the `end` token of a ClassNode.
49
+ class LocationTarget
50
+ attr_reader :location
51
+
52
+ def initialize(location)
53
+ @location = location
54
+ end
55
+
56
+ def start_offset
57
+ location.start_offset
58
+ end
59
+
60
+ def end_offset
61
+ location.end_offset
62
+ end
63
+
64
+ def encloses?(comment)
65
+ false
66
+ end
67
+
68
+ def <<(comment)
69
+ location.comments << comment
70
+ end
71
+ end
72
+
73
+ attr_reader :parse_result
74
+
75
+ def initialize(parse_result)
76
+ @parse_result = parse_result
77
+ end
78
+
79
+ def attach!
80
+ parse_result.comments.each do |comment|
81
+ preceding, enclosing, following = nearest_targets(parse_result.value, comment)
82
+ target =
83
+ if comment.trailing?
84
+ preceding || following || enclosing || NodeTarget.new(parse_result.value)
85
+ else
86
+ # If a comment exists on its own line, prefer a leading comment.
87
+ following || preceding || enclosing || NodeTarget.new(parse_result.value)
88
+ end
89
+
90
+ target << comment
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # Responsible for finding the nearest targets to the given comment within
97
+ # the context of the given encapsulating node.
98
+ def nearest_targets(node, comment)
99
+ comment_start = comment.location.start_offset
100
+ comment_end = comment.location.end_offset
101
+
102
+ targets = []
103
+ node.comment_targets.map do |value|
104
+ case value
105
+ when StatementsNode
106
+ targets.concat(value.body.map { |node| NodeTarget.new(node) })
107
+ when Node
108
+ targets << NodeTarget.new(value)
109
+ when Location
110
+ targets << LocationTarget.new(value)
111
+ end
112
+ end
113
+
114
+ targets.sort_by!(&:start_offset)
115
+ preceding = nil
116
+ following = nil
117
+
118
+ left = 0
119
+ right = targets.length
120
+
121
+ # This is a custom binary search that finds the nearest nodes to the
122
+ # given comment. When it finds a node that completely encapsulates the
123
+ # comment, it recurses downward into the tree.
124
+ while left < right
125
+ middle = (left + right) / 2
126
+ target = targets[middle]
127
+
128
+ target_start = target.start_offset
129
+ target_end = target.end_offset
130
+
131
+ if target.encloses?(comment)
132
+ # The comment is completely contained by this target. Abandon the
133
+ # binary search at this level.
134
+ return nearest_targets(target.node, comment)
135
+ end
136
+
137
+ if target_end <= comment_start
138
+ # This target falls completely before the comment. Because we will
139
+ # never consider this target or any targets before it again, this
140
+ # target must be the closest preceding target we have encountered so
141
+ # far.
142
+ preceding = target
143
+ left = middle + 1
144
+ next
145
+ end
146
+
147
+ if comment_end <= target_start
148
+ # This target falls completely after the comment. Because we will
149
+ # never consider this target or any targets after it again, this
150
+ # target must be the closest following target we have encountered so
151
+ # far.
152
+ following = target
153
+ right = middle
154
+ next
155
+ end
156
+
157
+ # This should only happen if there is a bug in this parser.
158
+ raise "Comment location overlaps with a target location"
159
+ end
160
+
161
+ [preceding, NodeTarget.new(node), following]
162
+ end
163
+ end
164
+
165
+ private_constant :Comments
166
+
167
+ # Attach the list of comments to their respective locations in the tree.
168
+ def attach_comments!
169
+ Comments.new(self).attach!
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # The :line tracepoint event gets fired whenever the Ruby VM encounters an
6
+ # expression on a new line. The types of expressions that can trigger this
7
+ # event are:
8
+ #
9
+ # * if statements
10
+ # * unless statements
11
+ # * nodes that are children of statements lists
12
+ #
13
+ # In order to keep track of the newlines, we have a list of offsets that
14
+ # come back from the parser. We assign these offsets to the first nodes that
15
+ # we find in the tree that are on those lines.
16
+ #
17
+ # Note that the logic in this file should be kept in sync with the Java
18
+ # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
+ # newlines for JRuby/TruffleRuby.
20
+ class Newlines < Visitor
21
+ def initialize(newline_marked)
22
+ @newline_marked = newline_marked
23
+ end
24
+
25
+ def visit_block_node(node)
26
+ old_newline_marked = @newline_marked
27
+ @newline_marked = Array.new(old_newline_marked.size, false)
28
+
29
+ begin
30
+ super(node)
31
+ ensure
32
+ @newline_marked = old_newline_marked
33
+ end
34
+ end
35
+
36
+ alias_method :visit_lambda_node, :visit_block_node
37
+
38
+ def visit_if_node(node)
39
+ node.set_newline_flag(@newline_marked)
40
+ super(node)
41
+ end
42
+
43
+ alias_method :visit_unless_node, :visit_if_node
44
+
45
+ def visit_statements_node(node)
46
+ node.body.each do |child|
47
+ child.set_newline_flag(@newline_marked)
48
+ end
49
+ super(node)
50
+ end
51
+ end
52
+
53
+ private_constant :Newlines
54
+
55
+ # Walk the tree and mark nodes that are on a new line.
56
+ def mark_newlines!
57
+ value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
58
+ end
59
+ end
60
+ end