prism 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Here we are reopening the prism module to provide methods on nodes that aren't
4
+ # templated and are meant as convenience methods.
5
+ module Prism
6
+ class FloatNode < Node
7
+ # Returns the value of the node as a Ruby Float.
8
+ def value
9
+ Float(slice)
10
+ end
11
+ end
12
+
13
+ class ImaginaryNode < Node
14
+ # Returns the value of the node as a Ruby Complex.
15
+ def value
16
+ Complex(0, numeric.value)
17
+ end
18
+ end
19
+
20
+ class IntegerNode < Node
21
+ # Returns the value of the node as a Ruby Integer.
22
+ def value
23
+ Integer(slice)
24
+ end
25
+ end
26
+
27
+ class InterpolatedRegularExpressionNode < Node
28
+ # Returns a numeric value that represents the flags that were used to create
29
+ # the regular expression.
30
+ def options
31
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
32
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
33
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
34
+ o
35
+ end
36
+ end
37
+
38
+ class RationalNode < Node
39
+ # Returns the value of the node as a Ruby Rational.
40
+ def value
41
+ Rational(slice.chomp("r"))
42
+ end
43
+ end
44
+
45
+ class RegularExpressionNode < Node
46
+ # Returns a numeric value that represents the flags that were used to create
47
+ # the regular expression.
48
+ def options
49
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
50
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
51
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
52
+ o
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This object is responsible for generating the output for the inspect method
5
+ # implementations of child nodes.
6
+ class NodeInspector
7
+ attr_reader :prefix, :output
8
+
9
+ def initialize(prefix = "")
10
+ @prefix = prefix
11
+ @output = +""
12
+ end
13
+
14
+ # Appends a line to the output with the current prefix.
15
+ def <<(line)
16
+ output << "#{prefix}#{line}"
17
+ end
18
+
19
+ # This generates a string that is used as the header of the inspect output
20
+ # for any given node.
21
+ def header(node)
22
+ output = +"@ #{node.class.name.split("::").last} ("
23
+ output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})"
24
+ output << ", newline: true" if node.newline?
25
+ output << ")\n"
26
+ output
27
+ end
28
+
29
+ # Generates a string that represents a list of nodes. It handles properly
30
+ # using the box drawing characters to make the output look nice.
31
+ def list(prefix, nodes)
32
+ output = +"(length: #{nodes.length})\n"
33
+ last_index = nodes.length - 1
34
+
35
+ nodes.each_with_index do |node, index|
36
+ pointer, preadd = (index == last_index) ? ["└── ", " "] : ["├── ", "│ "]
37
+ node_prefix = "#{prefix}#{preadd}"
38
+ output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}")
39
+ end
40
+
41
+ output
42
+ end
43
+
44
+ # Generates a string that represents a location field on a node.
45
+ def location(value)
46
+ if value
47
+ "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}"
48
+ else
49
+ "∅"
50
+ end
51
+ end
52
+
53
+ # Generates a string that represents a child node.
54
+ def child_node(node, append)
55
+ node.inspect(child_inspector(append)).delete_prefix(prefix)
56
+ end
57
+
58
+ # Returns a new inspector that can be used to inspect a child node.
59
+ def child_inspector(append)
60
+ NodeInspector.new("#{prefix}#{append}")
61
+ end
62
+
63
+ # Returns the output as a string.
64
+ def to_str
65
+ output
66
+ end
67
+ end
68
+ end
data/lib/prism/pack.rb ADDED
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Pack
5
+ %i[
6
+ SPACE
7
+ COMMENT
8
+ INTEGER
9
+ UTF8
10
+ BER
11
+ FLOAT
12
+ STRING_SPACE_PADDED
13
+ STRING_NULL_PADDED
14
+ STRING_NULL_TERMINATED
15
+ STRING_MSB
16
+ STRING_LSB
17
+ STRING_HEX_HIGH
18
+ STRING_HEX_LOW
19
+ STRING_UU
20
+ STRING_MIME
21
+ STRING_BASE64
22
+ STRING_FIXED
23
+ STRING_POINTER
24
+ MOVE
25
+ BACK
26
+ NULL
27
+
28
+ UNSIGNED
29
+ SIGNED
30
+ SIGNED_NA
31
+
32
+ AGNOSTIC_ENDIAN
33
+ LITTLE_ENDIAN
34
+ BIG_ENDIAN
35
+ NATIVE_ENDIAN
36
+ ENDIAN_NA
37
+
38
+ SIZE_SHORT
39
+ SIZE_INT
40
+ SIZE_LONG
41
+ SIZE_LONG_LONG
42
+ SIZE_8
43
+ SIZE_16
44
+ SIZE_32
45
+ SIZE_64
46
+ SIZE_P
47
+ SIZE_NA
48
+
49
+ LENGTH_FIXED
50
+ LENGTH_MAX
51
+ LENGTH_RELATIVE
52
+ LENGTH_NA
53
+ ].each do |const|
54
+ const_set(const, const)
55
+ end
56
+
57
+ class Directive
58
+ attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
59
+
60
+ def initialize(version, variant, source, type, signed, endian, size, length_type, length)
61
+ @version = version
62
+ @variant = variant
63
+ @source = source
64
+ @type = type
65
+ @signed = signed
66
+ @endian = endian
67
+ @size = size
68
+ @length_type = length_type
69
+ @length = length
70
+ end
71
+
72
+ ENDIAN_DESCRIPTIONS = {
73
+ AGNOSTIC_ENDIAN: 'agnostic',
74
+ LITTLE_ENDIAN: 'little-endian (VAX)',
75
+ BIG_ENDIAN: 'big-endian (network)',
76
+ NATIVE_ENDIAN: 'native-endian',
77
+ ENDIAN_NA: 'n/a'
78
+ }
79
+
80
+ SIGNED_DESCRIPTIONS = {
81
+ UNSIGNED: 'unsigned',
82
+ SIGNED: 'signed',
83
+ SIGNED_NA: 'n/a'
84
+ }
85
+
86
+ SIZE_DESCRIPTIONS = {
87
+ SIZE_SHORT: 'short',
88
+ SIZE_INT: 'int-width',
89
+ SIZE_LONG: 'long',
90
+ SIZE_LONG_LONG: 'long long',
91
+ SIZE_8: '8-bit',
92
+ SIZE_16: '16-bit',
93
+ SIZE_32: '32-bit',
94
+ SIZE_64: '64-bit',
95
+ SIZE_P: 'pointer-width'
96
+ }
97
+
98
+ def describe
99
+ case type
100
+ when SPACE
101
+ 'whitespace'
102
+ when COMMENT
103
+ 'comment'
104
+ when INTEGER
105
+ if size == SIZE_8
106
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
107
+ else
108
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
109
+ end
110
+ case length_type
111
+ when LENGTH_FIXED
112
+ if length > 1
113
+ base + ", x#{length}"
114
+ else
115
+ base
116
+ end
117
+ when LENGTH_MAX
118
+ base + ', as many as possible'
119
+ end
120
+ when UTF8
121
+ 'UTF-8 character'
122
+ when BER
123
+ 'BER-compressed integer'
124
+ when FLOAT
125
+ "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
126
+ when STRING_SPACE_PADDED
127
+ 'arbitrary binary string (space padded)'
128
+ when STRING_NULL_PADDED
129
+ 'arbitrary binary string (null padded, count is width)'
130
+ when STRING_NULL_TERMINATED
131
+ 'arbitrary binary string (null padded, count is width), except that null is added with *'
132
+ when STRING_MSB
133
+ 'bit string (MSB first)'
134
+ when STRING_LSB
135
+ 'bit string (LSB first)'
136
+ when STRING_HEX_HIGH
137
+ 'hex string (high nibble first)'
138
+ when STRING_HEX_LOW
139
+ 'hex string (low nibble first)'
140
+ when STRING_UU
141
+ 'UU-encoded string'
142
+ when STRING_MIME
143
+ 'quoted printable, MIME encoding'
144
+ when STRING_BASE64
145
+ 'base64 encoded string'
146
+ when STRING_FIXED
147
+ 'pointer to a structure (fixed-length string)'
148
+ when STRING_POINTER
149
+ 'pointer to a null-terminated string'
150
+ when MOVE
151
+ 'move to absolute position'
152
+ when BACK
153
+ 'back up a byte'
154
+ when NULL
155
+ 'null byte'
156
+ else
157
+ raise
158
+ end
159
+ end
160
+ end
161
+
162
+ class Format
163
+ attr_reader :directives, :encoding
164
+
165
+ def initialize(directives, encoding)
166
+ @directives = directives
167
+ @encoding = encoding
168
+ end
169
+
170
+ def describe
171
+ source_width = directives.map { |d| d.source.inspect.length }.max
172
+ directive_lines = directives.map do |directive|
173
+ if directive.type == SPACE
174
+ source = directive.source.inspect
175
+ else
176
+ source = directive.source
177
+ end
178
+ " #{source.ljust(source_width)} #{directive.describe}"
179
+ end
180
+
181
+ (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # When we've parsed the source, we have both the syntax tree and the list of
6
+ # comments that we found in the source. This class is responsible for
7
+ # walking the tree and finding the nearest location to attach each comment.
8
+ #
9
+ # It does this by first finding the nearest locations to each comment.
10
+ # Locations can either come from nodes directly or from location fields on
11
+ # nodes. For example, a `ClassNode` has an overall location encompassing the
12
+ # entire class, but it also has a location for the `class` keyword.
13
+ #
14
+ # Once the nearest locations are found, it determines which one to attach
15
+ # to. If it's a trailing comment (a comment on the same line as other source
16
+ # code), it will favor attaching to the nearest location that occurs before
17
+ # the comment. Otherwise it will favor attaching to the nearest location
18
+ # that is after the comment.
19
+ class Comments
20
+ # A target for attaching comments that is based on a specific node's
21
+ # location.
22
+ class NodeTarget
23
+ attr_reader :node
24
+
25
+ def initialize(node)
26
+ @node = node
27
+ end
28
+
29
+ def start_offset
30
+ node.location.start_offset
31
+ end
32
+
33
+ def end_offset
34
+ node.location.end_offset
35
+ end
36
+
37
+ def encloses?(comment)
38
+ start_offset <= comment.location.start_offset &&
39
+ comment.location.end_offset <= end_offset
40
+ end
41
+
42
+ def <<(comment)
43
+ node.location.comments << comment
44
+ end
45
+ end
46
+
47
+ # A target for attaching comments that is based on a location field on a
48
+ # node. For example, the `end` token of a ClassNode.
49
+ class LocationTarget
50
+ attr_reader :location
51
+
52
+ def initialize(location)
53
+ @location = location
54
+ end
55
+
56
+ def start_offset
57
+ location.start_offset
58
+ end
59
+
60
+ def end_offset
61
+ location.end_offset
62
+ end
63
+
64
+ def encloses?(comment)
65
+ false
66
+ end
67
+
68
+ def <<(comment)
69
+ location.comments << comment
70
+ end
71
+ end
72
+
73
+ attr_reader :parse_result
74
+
75
+ def initialize(parse_result)
76
+ @parse_result = parse_result
77
+ end
78
+
79
+ def attach!
80
+ parse_result.comments.each do |comment|
81
+ preceding, enclosing, following = nearest_targets(parse_result.value, comment)
82
+ target =
83
+ if comment.trailing?
84
+ preceding || following || enclosing || NodeTarget.new(parse_result.value)
85
+ else
86
+ # If a comment exists on its own line, prefer a leading comment.
87
+ following || preceding || enclosing || NodeTarget.new(parse_result.value)
88
+ end
89
+
90
+ target << comment
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # Responsible for finding the nearest targets to the given comment within
97
+ # the context of the given encapsulating node.
98
+ def nearest_targets(node, comment)
99
+ comment_start = comment.location.start_offset
100
+ comment_end = comment.location.end_offset
101
+
102
+ targets = []
103
+ node.comment_targets.map do |value|
104
+ case value
105
+ when StatementsNode
106
+ targets.concat(value.body.map { |node| NodeTarget.new(node) })
107
+ when Node
108
+ targets << NodeTarget.new(value)
109
+ when Location
110
+ targets << LocationTarget.new(value)
111
+ end
112
+ end
113
+
114
+ targets.sort_by!(&:start_offset)
115
+ preceding = nil
116
+ following = nil
117
+
118
+ left = 0
119
+ right = targets.length
120
+
121
+ # This is a custom binary search that finds the nearest nodes to the
122
+ # given comment. When it finds a node that completely encapsulates the
123
+ # comment, it recurses downward into the tree.
124
+ while left < right
125
+ middle = (left + right) / 2
126
+ target = targets[middle]
127
+
128
+ target_start = target.start_offset
129
+ target_end = target.end_offset
130
+
131
+ if target.encloses?(comment)
132
+ # The comment is completely contained by this target. Abandon the
133
+ # binary search at this level.
134
+ return nearest_targets(target.node, comment)
135
+ end
136
+
137
+ if target_end <= comment_start
138
+ # This target falls completely before the comment. Because we will
139
+ # never consider this target or any targets before it again, this
140
+ # target must be the closest preceding target we have encountered so
141
+ # far.
142
+ preceding = target
143
+ left = middle + 1
144
+ next
145
+ end
146
+
147
+ if comment_end <= target_start
148
+ # This target falls completely after the comment. Because we will
149
+ # never consider this target or any targets after it again, this
150
+ # target must be the closest following target we have encountered so
151
+ # far.
152
+ following = target
153
+ right = middle
154
+ next
155
+ end
156
+
157
+ # This should only happen if there is a bug in this parser.
158
+ raise "Comment location overlaps with a target location"
159
+ end
160
+
161
+ [preceding, NodeTarget.new(node), following]
162
+ end
163
+ end
164
+
165
+ private_constant :Comments
166
+
167
+ # Attach the list of comments to their respective locations in the tree.
168
+ def attach_comments!
169
+ Comments.new(self).attach!
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # The :line tracepoint event gets fired whenever the Ruby VM encounters an
6
+ # expression on a new line. The types of expressions that can trigger this
7
+ # event are:
8
+ #
9
+ # * if statements
10
+ # * unless statements
11
+ # * nodes that are children of statements lists
12
+ #
13
+ # In order to keep track of the newlines, we have a list of offsets that
14
+ # come back from the parser. We assign these offsets to the first nodes that
15
+ # we find in the tree that are on those lines.
16
+ #
17
+ # Note that the logic in this file should be kept in sync with the Java
18
+ # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
+ # newlines for JRuby/TruffleRuby.
20
+ class Newlines < Visitor
21
+ def initialize(newline_marked)
22
+ @newline_marked = newline_marked
23
+ end
24
+
25
+ def visit_block_node(node)
26
+ old_newline_marked = @newline_marked
27
+ @newline_marked = Array.new(old_newline_marked.size, false)
28
+
29
+ begin
30
+ super(node)
31
+ ensure
32
+ @newline_marked = old_newline_marked
33
+ end
34
+ end
35
+
36
+ alias_method :visit_lambda_node, :visit_block_node
37
+
38
+ def visit_if_node(node)
39
+ node.set_newline_flag(@newline_marked)
40
+ super(node)
41
+ end
42
+
43
+ alias_method :visit_unless_node, :visit_if_node
44
+
45
+ def visit_statements_node(node)
46
+ node.body.each do |child|
47
+ child.set_newline_flag(@newline_marked)
48
+ end
49
+ super(node)
50
+ end
51
+ end
52
+
53
+ private_constant :Newlines
54
+
55
+ # Walk the tree and mark nodes that are on a new line.
56
+ def mark_newlines!
57
+ value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
58
+ end
59
+ end
60
+ end