debase-ruby_core_source 3.3.5 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/.idea/vcs.xml +28 -0
- data/CHANGELOG.md +8 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/addr2line.h +22 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/builtin.h +119 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ccan/build_assert/build_assert.h +40 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ccan/check_type/check_type.h +63 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ccan/container_of/container_of.h +142 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ccan/list/list.h +791 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ccan/str/str.h +17 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/constant.h +53 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/darray.h +209 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/debug_counter.h +423 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/dln.h +32 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/encindex.h +70 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/eval_intern.h +324 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/hrtime.h +237 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/id.h +347 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/id_table.h +39 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/insns.inc +265 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/insns_info.inc +9902 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/array.h +152 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/basic_operators.h +64 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/bignum.h +244 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/bits.h +568 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/class.h +283 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/cmdlineopt.h +65 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/compar.h +29 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/compile.h +34 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/compilers.h +107 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/complex.h +29 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/cont.h +35 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/dir.h +16 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/enc.h +19 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/encoding.h +36 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/enum.h +18 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/enumerator.h +21 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/error.h +218 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/eval.h +33 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/file.h +38 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/fixnum.h +184 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/gc.h +322 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/hash.h +192 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/imemo.h +261 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/inits.h +47 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/io.h +143 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/load.h +18 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/loadpath.h +16 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/math.h +23 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/missing.h +19 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/numeric.h +274 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/object.h +63 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/parse.h +129 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/proc.h +30 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/process.h +124 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/ractor.h +6 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/random.h +17 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/range.h +40 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/rational.h +71 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/re.h +28 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/ruby_parser.h +102 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/sanitizers.h +326 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/serial.h +23 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/signal.h +24 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/st.h +11 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/static_assert.h +16 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/string.h +186 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/struct.h +127 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/symbol.h +45 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/thread.h +85 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/time.h +34 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/transcode.h +23 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/util.h +27 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/variable.h +72 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/vm.h +137 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal/warnings.h +16 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/internal.h +108 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/iseq.h +340 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/known_errors.inc +1419 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/method.h +255 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/node.h +111 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/node_name.inc +224 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/optinsn.inc +128 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/optunifs.inc +43 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/parse.h +244 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/parser_bits.h +564 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/parser_node.h +32 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/parser_st.h +162 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/parser_value.h +106 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/ast.h +7524 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/defines.h +242 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/diagnostic.h +450 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/encoding.h +283 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/extension.h +19 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/node.h +129 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/options.h +396 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/pack.h +163 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/parser.h +933 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/prettyprint.h +34 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/prism.h +336 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/regexp.h +43 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/static_literals.h +121 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_buffer.h +218 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_char.h +204 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_constant_pool.h +218 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_integer.h +126 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_list.h +97 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_memchr.h +29 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_newline_list.h +113 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_string.h +190 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_strncasecmp.h +32 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/util/pm_strpbrk.h +46 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism/version.h +29 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/prism_compile.h +99 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/probes_helper.h +42 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ractor_core.h +382 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/regenc.h +254 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/regint.h +1006 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/regparse.h +371 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/revision.h +5 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/rjit.h +101 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/rjit_c.h +165 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ruby_assert.h +14 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/ruby_atomic.h +23 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/rubyparser.h +1350 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/shape.h +234 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/siphash.h +48 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/symbol.h +123 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/thread_none.h +21 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/thread_pthread.h +168 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/thread_win32.h +58 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/timev.h +58 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/transcode_data.h +138 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/variable.h +39 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/version.h +69 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm.inc +5840 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_call_iseq_optimized.inc +244 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_callinfo.h +627 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_core.h +2222 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_debug.h +124 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_exec.h +199 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_insnhelper.h +277 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_opts.h +67 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vm_sync.h +137 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/vmtc.inc +259 -0
- data/lib/debase/ruby_core_source/ruby-3.4.0-preview2/yjit.h +79 -0
- data/lib/debase/ruby_core_source/version.rb +1 -1
- metadata +150 -6
@@ -0,0 +1,933 @@
|
|
1
|
+
/**
|
2
|
+
* @file parser.h
|
3
|
+
*
|
4
|
+
* The parser used to parse Ruby source.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_PARSER_H
|
7
|
+
#define PRISM_PARSER_H
|
8
|
+
|
9
|
+
#include "prism/defines.h"
|
10
|
+
#include "prism/ast.h"
|
11
|
+
#include "prism/encoding.h"
|
12
|
+
#include "prism/options.h"
|
13
|
+
#include "prism/static_literals.h"
|
14
|
+
#include "prism/util/pm_constant_pool.h"
|
15
|
+
#include "prism/util/pm_list.h"
|
16
|
+
#include "prism/util/pm_newline_list.h"
|
17
|
+
#include "prism/util/pm_string.h"
|
18
|
+
|
19
|
+
#include <stdbool.h>
|
20
|
+
|
21
|
+
/**
|
22
|
+
* This enum provides various bits that represent different kinds of states that
|
23
|
+
* the lexer can track. This is used to determine which kind of token to return
|
24
|
+
* based on the context of the parser.
|
25
|
+
*/
|
26
|
+
typedef enum {
|
27
|
+
PM_LEX_STATE_BIT_BEG,
|
28
|
+
PM_LEX_STATE_BIT_END,
|
29
|
+
PM_LEX_STATE_BIT_ENDARG,
|
30
|
+
PM_LEX_STATE_BIT_ENDFN,
|
31
|
+
PM_LEX_STATE_BIT_ARG,
|
32
|
+
PM_LEX_STATE_BIT_CMDARG,
|
33
|
+
PM_LEX_STATE_BIT_MID,
|
34
|
+
PM_LEX_STATE_BIT_FNAME,
|
35
|
+
PM_LEX_STATE_BIT_DOT,
|
36
|
+
PM_LEX_STATE_BIT_CLASS,
|
37
|
+
PM_LEX_STATE_BIT_LABEL,
|
38
|
+
PM_LEX_STATE_BIT_LABELED,
|
39
|
+
PM_LEX_STATE_BIT_FITEM
|
40
|
+
} pm_lex_state_bit_t;
|
41
|
+
|
42
|
+
/**
|
43
|
+
* This enum combines the various bits from the above enum into individual
|
44
|
+
* values that represent the various states of the lexer.
|
45
|
+
*/
|
46
|
+
typedef enum {
|
47
|
+
PM_LEX_STATE_NONE = 0,
|
48
|
+
PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
|
49
|
+
PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
|
50
|
+
PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
|
51
|
+
PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
|
52
|
+
PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
|
53
|
+
PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
|
54
|
+
PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
|
55
|
+
PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
|
56
|
+
PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
|
57
|
+
PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
|
58
|
+
PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
|
59
|
+
PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
|
60
|
+
PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
|
61
|
+
PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
|
62
|
+
PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
|
63
|
+
PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
|
64
|
+
} pm_lex_state_t;
|
65
|
+
|
66
|
+
/**
|
67
|
+
* The type of quote that a heredoc uses.
|
68
|
+
*/
|
69
|
+
typedef enum {
|
70
|
+
PM_HEREDOC_QUOTE_NONE,
|
71
|
+
PM_HEREDOC_QUOTE_SINGLE = '\'',
|
72
|
+
PM_HEREDOC_QUOTE_DOUBLE = '"',
|
73
|
+
PM_HEREDOC_QUOTE_BACKTICK = '`',
|
74
|
+
} pm_heredoc_quote_t;
|
75
|
+
|
76
|
+
/**
|
77
|
+
* The type of indentation that a heredoc uses.
|
78
|
+
*/
|
79
|
+
typedef enum {
|
80
|
+
PM_HEREDOC_INDENT_NONE,
|
81
|
+
PM_HEREDOC_INDENT_DASH,
|
82
|
+
PM_HEREDOC_INDENT_TILDE,
|
83
|
+
} pm_heredoc_indent_t;
|
84
|
+
|
85
|
+
/**
|
86
|
+
* All of the information necessary to store to lexing a heredoc.
|
87
|
+
*/
|
88
|
+
typedef struct {
|
89
|
+
/** A pointer to the start of the heredoc identifier. */
|
90
|
+
const uint8_t *ident_start;
|
91
|
+
|
92
|
+
/** The length of the heredoc identifier. */
|
93
|
+
size_t ident_length;
|
94
|
+
|
95
|
+
/** The type of quote that the heredoc uses. */
|
96
|
+
pm_heredoc_quote_t quote;
|
97
|
+
|
98
|
+
/** The type of indentation that the heredoc uses. */
|
99
|
+
pm_heredoc_indent_t indent;
|
100
|
+
} pm_heredoc_lex_mode_t;
|
101
|
+
|
102
|
+
/**
|
103
|
+
* When lexing Ruby source, the lexer has a small amount of state to tell which
|
104
|
+
* kind of token it is currently lexing. For example, when we find the start of
|
105
|
+
* a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
|
106
|
+
* that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
|
107
|
+
* are found as part of a string.
|
108
|
+
*/
|
109
|
+
typedef struct pm_lex_mode {
|
110
|
+
/** The type of this lex mode. */
|
111
|
+
enum {
|
112
|
+
/** This state is used when any given token is being lexed. */
|
113
|
+
PM_LEX_DEFAULT,
|
114
|
+
|
115
|
+
/**
|
116
|
+
* This state is used when we're lexing as normal but inside an embedded
|
117
|
+
* expression of a string.
|
118
|
+
*/
|
119
|
+
PM_LEX_EMBEXPR,
|
120
|
+
|
121
|
+
/**
|
122
|
+
* This state is used when we're lexing a variable that is embedded
|
123
|
+
* directly inside of a string with the # shorthand.
|
124
|
+
*/
|
125
|
+
PM_LEX_EMBVAR,
|
126
|
+
|
127
|
+
/** This state is used when you are inside the content of a heredoc. */
|
128
|
+
PM_LEX_HEREDOC,
|
129
|
+
|
130
|
+
/**
|
131
|
+
* This state is used when we are lexing a list of tokens, as in a %w
|
132
|
+
* word list literal or a %i symbol list literal.
|
133
|
+
*/
|
134
|
+
PM_LEX_LIST,
|
135
|
+
|
136
|
+
/**
|
137
|
+
* This state is used when a regular expression has been begun and we
|
138
|
+
* are looking for the terminator.
|
139
|
+
*/
|
140
|
+
PM_LEX_REGEXP,
|
141
|
+
|
142
|
+
/**
|
143
|
+
* This state is used when we are lexing a string or a string-like
|
144
|
+
* token, as in string content with either quote or an xstring.
|
145
|
+
*/
|
146
|
+
PM_LEX_STRING
|
147
|
+
} mode;
|
148
|
+
|
149
|
+
/** The data associated with this type of lex mode. */
|
150
|
+
union {
|
151
|
+
struct {
|
152
|
+
/** This keeps track of the nesting level of the list. */
|
153
|
+
size_t nesting;
|
154
|
+
|
155
|
+
/** Whether or not interpolation is allowed in this list. */
|
156
|
+
bool interpolation;
|
157
|
+
|
158
|
+
/**
|
159
|
+
* When lexing a list, it takes into account balancing the
|
160
|
+
* terminator if the terminator is one of (), [], {}, or <>.
|
161
|
+
*/
|
162
|
+
uint8_t incrementor;
|
163
|
+
|
164
|
+
/** This is the terminator of the list literal. */
|
165
|
+
uint8_t terminator;
|
166
|
+
|
167
|
+
/**
|
168
|
+
* This is the character set that should be used to delimit the
|
169
|
+
* tokens within the list.
|
170
|
+
*/
|
171
|
+
uint8_t breakpoints[11];
|
172
|
+
} list;
|
173
|
+
|
174
|
+
struct {
|
175
|
+
/**
|
176
|
+
* This keeps track of the nesting level of the regular expression.
|
177
|
+
*/
|
178
|
+
size_t nesting;
|
179
|
+
|
180
|
+
/**
|
181
|
+
* When lexing a regular expression, it takes into account balancing
|
182
|
+
* the terminator if the terminator is one of (), [], {}, or <>.
|
183
|
+
*/
|
184
|
+
uint8_t incrementor;
|
185
|
+
|
186
|
+
/** This is the terminator of the regular expression. */
|
187
|
+
uint8_t terminator;
|
188
|
+
|
189
|
+
/**
|
190
|
+
* This is the character set that should be used to delimit the
|
191
|
+
* tokens within the regular expression.
|
192
|
+
*/
|
193
|
+
uint8_t breakpoints[7];
|
194
|
+
} regexp;
|
195
|
+
|
196
|
+
struct {
|
197
|
+
/** This keeps track of the nesting level of the string. */
|
198
|
+
size_t nesting;
|
199
|
+
|
200
|
+
/** Whether or not interpolation is allowed in this string. */
|
201
|
+
bool interpolation;
|
202
|
+
|
203
|
+
/**
|
204
|
+
* Whether or not at the end of the string we should allow a :,
|
205
|
+
* which would indicate this was a dynamic symbol instead of a
|
206
|
+
* string.
|
207
|
+
*/
|
208
|
+
bool label_allowed;
|
209
|
+
|
210
|
+
/**
|
211
|
+
* When lexing a string, it takes into account balancing the
|
212
|
+
* terminator if the terminator is one of (), [], {}, or <>.
|
213
|
+
*/
|
214
|
+
uint8_t incrementor;
|
215
|
+
|
216
|
+
/**
|
217
|
+
* This is the terminator of the string. It is typically either a
|
218
|
+
* single or double quote.
|
219
|
+
*/
|
220
|
+
uint8_t terminator;
|
221
|
+
|
222
|
+
/**
|
223
|
+
* This is the character set that should be used to delimit the
|
224
|
+
* tokens within the string.
|
225
|
+
*/
|
226
|
+
uint8_t breakpoints[7];
|
227
|
+
} string;
|
228
|
+
|
229
|
+
struct {
|
230
|
+
/**
|
231
|
+
* All of the data necessary to lex a heredoc.
|
232
|
+
*/
|
233
|
+
pm_heredoc_lex_mode_t base;
|
234
|
+
|
235
|
+
/**
|
236
|
+
* This is the pointer to the character where lexing should resume
|
237
|
+
* once the heredoc has been completely processed.
|
238
|
+
*/
|
239
|
+
const uint8_t *next_start;
|
240
|
+
|
241
|
+
/**
|
242
|
+
* This is used to track the amount of common whitespace on each
|
243
|
+
* line so that we know how much to dedent each line in the case of
|
244
|
+
* a tilde heredoc.
|
245
|
+
*/
|
246
|
+
size_t *common_whitespace;
|
247
|
+
|
248
|
+
/** True if the previous token ended with a line continuation. */
|
249
|
+
bool line_continuation;
|
250
|
+
} heredoc;
|
251
|
+
} as;
|
252
|
+
|
253
|
+
/** The previous lex state so that it knows how to pop. */
|
254
|
+
struct pm_lex_mode *prev;
|
255
|
+
} pm_lex_mode_t;
|
256
|
+
|
257
|
+
/**
|
258
|
+
* We pre-allocate a certain number of lex states in order to avoid having to
|
259
|
+
* call malloc too many times while parsing. You really shouldn't need more than
|
260
|
+
* this because you only really nest deeply when doing string interpolation.
|
261
|
+
*/
|
262
|
+
#define PM_LEX_STACK_SIZE 4
|
263
|
+
|
264
|
+
/**
|
265
|
+
* The parser used to parse Ruby source.
|
266
|
+
*/
|
267
|
+
typedef struct pm_parser pm_parser_t;
|
268
|
+
|
269
|
+
/**
|
270
|
+
* While parsing, we keep track of a stack of contexts. This is helpful for
|
271
|
+
* error recovery so that we can pop back to a previous context when we hit a
|
272
|
+
* token that is understood by a parent context but not by the current context.
|
273
|
+
*/
|
274
|
+
typedef enum {
|
275
|
+
/** a null context, used for returning a value from a function */
|
276
|
+
PM_CONTEXT_NONE = 0,
|
277
|
+
|
278
|
+
/** a begin statement */
|
279
|
+
PM_CONTEXT_BEGIN,
|
280
|
+
|
281
|
+
/** an ensure statement with an explicit begin */
|
282
|
+
PM_CONTEXT_BEGIN_ENSURE,
|
283
|
+
|
284
|
+
/** a rescue else statement with an explicit begin */
|
285
|
+
PM_CONTEXT_BEGIN_ELSE,
|
286
|
+
|
287
|
+
/** a rescue statement with an explicit begin */
|
288
|
+
PM_CONTEXT_BEGIN_RESCUE,
|
289
|
+
|
290
|
+
/** expressions in block arguments using braces */
|
291
|
+
PM_CONTEXT_BLOCK_BRACES,
|
292
|
+
|
293
|
+
/** expressions in block arguments using do..end */
|
294
|
+
PM_CONTEXT_BLOCK_KEYWORDS,
|
295
|
+
|
296
|
+
/** an ensure statement within a do..end block */
|
297
|
+
PM_CONTEXT_BLOCK_ENSURE,
|
298
|
+
|
299
|
+
/** a rescue else statement within a do..end block */
|
300
|
+
PM_CONTEXT_BLOCK_ELSE,
|
301
|
+
|
302
|
+
/** a rescue statement within a do..end block */
|
303
|
+
PM_CONTEXT_BLOCK_RESCUE,
|
304
|
+
|
305
|
+
/** a case when statements */
|
306
|
+
PM_CONTEXT_CASE_WHEN,
|
307
|
+
|
308
|
+
/** a case in statements */
|
309
|
+
PM_CONTEXT_CASE_IN,
|
310
|
+
|
311
|
+
/** a class declaration */
|
312
|
+
PM_CONTEXT_CLASS,
|
313
|
+
|
314
|
+
/** an ensure statement within a class statement */
|
315
|
+
PM_CONTEXT_CLASS_ENSURE,
|
316
|
+
|
317
|
+
/** a rescue else statement within a class statement */
|
318
|
+
PM_CONTEXT_CLASS_ELSE,
|
319
|
+
|
320
|
+
/** a rescue statement within a class statement */
|
321
|
+
PM_CONTEXT_CLASS_RESCUE,
|
322
|
+
|
323
|
+
/** a method definition */
|
324
|
+
PM_CONTEXT_DEF,
|
325
|
+
|
326
|
+
/** an ensure statement within a method definition */
|
327
|
+
PM_CONTEXT_DEF_ENSURE,
|
328
|
+
|
329
|
+
/** a rescue else statement within a method definition */
|
330
|
+
PM_CONTEXT_DEF_ELSE,
|
331
|
+
|
332
|
+
/** a rescue statement within a method definition */
|
333
|
+
PM_CONTEXT_DEF_RESCUE,
|
334
|
+
|
335
|
+
/** a method definition's parameters */
|
336
|
+
PM_CONTEXT_DEF_PARAMS,
|
337
|
+
|
338
|
+
/** a defined? expression */
|
339
|
+
PM_CONTEXT_DEFINED,
|
340
|
+
|
341
|
+
/** a method definition's default parameter */
|
342
|
+
PM_CONTEXT_DEFAULT_PARAMS,
|
343
|
+
|
344
|
+
/** an else clause */
|
345
|
+
PM_CONTEXT_ELSE,
|
346
|
+
|
347
|
+
/** an elsif clause */
|
348
|
+
PM_CONTEXT_ELSIF,
|
349
|
+
|
350
|
+
/** an interpolated expression */
|
351
|
+
PM_CONTEXT_EMBEXPR,
|
352
|
+
|
353
|
+
/** a for loop */
|
354
|
+
PM_CONTEXT_FOR,
|
355
|
+
|
356
|
+
/** a for loop's index */
|
357
|
+
PM_CONTEXT_FOR_INDEX,
|
358
|
+
|
359
|
+
/** an if statement */
|
360
|
+
PM_CONTEXT_IF,
|
361
|
+
|
362
|
+
/** a lambda expression with braces */
|
363
|
+
PM_CONTEXT_LAMBDA_BRACES,
|
364
|
+
|
365
|
+
/** a lambda expression with do..end */
|
366
|
+
PM_CONTEXT_LAMBDA_DO_END,
|
367
|
+
|
368
|
+
/** an ensure statement within a lambda expression */
|
369
|
+
PM_CONTEXT_LAMBDA_ENSURE,
|
370
|
+
|
371
|
+
/** a rescue else statement within a lambda expression */
|
372
|
+
PM_CONTEXT_LAMBDA_ELSE,
|
373
|
+
|
374
|
+
/** a rescue statement within a lambda expression */
|
375
|
+
PM_CONTEXT_LAMBDA_RESCUE,
|
376
|
+
|
377
|
+
/** the predicate clause of a loop statement */
|
378
|
+
PM_CONTEXT_LOOP_PREDICATE,
|
379
|
+
|
380
|
+
/** the top level context */
|
381
|
+
PM_CONTEXT_MAIN,
|
382
|
+
|
383
|
+
/** a module declaration */
|
384
|
+
PM_CONTEXT_MODULE,
|
385
|
+
|
386
|
+
/** an ensure statement within a module statement */
|
387
|
+
PM_CONTEXT_MODULE_ENSURE,
|
388
|
+
|
389
|
+
/** a rescue else statement within a module statement */
|
390
|
+
PM_CONTEXT_MODULE_ELSE,
|
391
|
+
|
392
|
+
/** a rescue statement within a module statement */
|
393
|
+
PM_CONTEXT_MODULE_RESCUE,
|
394
|
+
|
395
|
+
/** a multiple target expression */
|
396
|
+
PM_CONTEXT_MULTI_TARGET,
|
397
|
+
|
398
|
+
/** a parenthesized expression */
|
399
|
+
PM_CONTEXT_PARENS,
|
400
|
+
|
401
|
+
/** an END block */
|
402
|
+
PM_CONTEXT_POSTEXE,
|
403
|
+
|
404
|
+
/** a predicate inside an if/elsif/unless statement */
|
405
|
+
PM_CONTEXT_PREDICATE,
|
406
|
+
|
407
|
+
/** a BEGIN block */
|
408
|
+
PM_CONTEXT_PREEXE,
|
409
|
+
|
410
|
+
/** a modifier rescue clause */
|
411
|
+
PM_CONTEXT_RESCUE_MODIFIER,
|
412
|
+
|
413
|
+
/** a singleton class definition */
|
414
|
+
PM_CONTEXT_SCLASS,
|
415
|
+
|
416
|
+
/** an ensure statement with a singleton class */
|
417
|
+
PM_CONTEXT_SCLASS_ENSURE,
|
418
|
+
|
419
|
+
/** a rescue else statement with a singleton class */
|
420
|
+
PM_CONTEXT_SCLASS_ELSE,
|
421
|
+
|
422
|
+
/** a rescue statement with a singleton class */
|
423
|
+
PM_CONTEXT_SCLASS_RESCUE,
|
424
|
+
|
425
|
+
/** a ternary expression */
|
426
|
+
PM_CONTEXT_TERNARY,
|
427
|
+
|
428
|
+
/** an unless statement */
|
429
|
+
PM_CONTEXT_UNLESS,
|
430
|
+
|
431
|
+
/** an until statement */
|
432
|
+
PM_CONTEXT_UNTIL,
|
433
|
+
|
434
|
+
/** a while statement */
|
435
|
+
PM_CONTEXT_WHILE,
|
436
|
+
} pm_context_t;
|
437
|
+
|
438
|
+
/** This is a node in a linked list of contexts. */
|
439
|
+
typedef struct pm_context_node {
|
440
|
+
/** The context that this node represents. */
|
441
|
+
pm_context_t context;
|
442
|
+
|
443
|
+
/** A pointer to the previous context in the linked list. */
|
444
|
+
struct pm_context_node *prev;
|
445
|
+
} pm_context_node_t;
|
446
|
+
|
447
|
+
/** This is the type of a comment that we've found while parsing. */
|
448
|
+
typedef enum {
|
449
|
+
PM_COMMENT_INLINE,
|
450
|
+
PM_COMMENT_EMBDOC
|
451
|
+
} pm_comment_type_t;
|
452
|
+
|
453
|
+
/**
|
454
|
+
* This is a node in the linked list of comments that we've found while parsing.
|
455
|
+
*
|
456
|
+
* @extends pm_list_node_t
|
457
|
+
*/
|
458
|
+
typedef struct pm_comment {
|
459
|
+
/** The embedded base node. */
|
460
|
+
pm_list_node_t node;
|
461
|
+
|
462
|
+
/** The location of the comment in the source. */
|
463
|
+
pm_location_t location;
|
464
|
+
|
465
|
+
/** The type of comment that we've found. */
|
466
|
+
pm_comment_type_t type;
|
467
|
+
} pm_comment_t;
|
468
|
+
|
469
|
+
/**
|
470
|
+
* This is a node in the linked list of magic comments that we've found while
|
471
|
+
* parsing.
|
472
|
+
*
|
473
|
+
* @extends pm_list_node_t
|
474
|
+
*/
|
475
|
+
typedef struct {
|
476
|
+
/** The embedded base node. */
|
477
|
+
pm_list_node_t node;
|
478
|
+
|
479
|
+
/** A pointer to the start of the key in the source. */
|
480
|
+
const uint8_t *key_start;
|
481
|
+
|
482
|
+
/** A pointer to the start of the value in the source. */
|
483
|
+
const uint8_t *value_start;
|
484
|
+
|
485
|
+
/** The length of the key in the source. */
|
486
|
+
uint32_t key_length;
|
487
|
+
|
488
|
+
/** The length of the value in the source. */
|
489
|
+
uint32_t value_length;
|
490
|
+
} pm_magic_comment_t;
|
491
|
+
|
492
|
+
/**
|
493
|
+
* When the encoding that is being used to parse the source is changed by prism,
|
494
|
+
* we provide the ability here to call out to a user-defined function.
|
495
|
+
*/
|
496
|
+
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
497
|
+
|
498
|
+
/**
|
499
|
+
* When you are lexing through a file, the lexer needs all of the information
|
500
|
+
* that the parser additionally provides (for example, the local table). So if
|
501
|
+
* you want to properly lex Ruby, you need to actually lex it in the context of
|
502
|
+
* the parser. In order to provide this functionality, we optionally allow a
|
503
|
+
* struct to be attached to the parser that calls back out to a user-provided
|
504
|
+
* callback when each token is lexed.
|
505
|
+
*/
|
506
|
+
typedef struct {
|
507
|
+
/**
|
508
|
+
* This opaque pointer is used to provide whatever information the user
|
509
|
+
* deemed necessary to the callback. In our case we use it to pass the array
|
510
|
+
* that the tokens get appended into.
|
511
|
+
*/
|
512
|
+
void *data;
|
513
|
+
|
514
|
+
/**
|
515
|
+
* This is the callback that is called when a token is lexed. It is passed
|
516
|
+
* the opaque data pointer, the parser, and the token that was lexed.
|
517
|
+
*/
|
518
|
+
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
|
519
|
+
} pm_lex_callback_t;
|
520
|
+
|
521
|
+
/** The type of shareable constant value that can be set. */
|
522
|
+
typedef uint8_t pm_shareable_constant_value_t;
|
523
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
|
524
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
|
525
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
|
526
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
|
527
|
+
|
528
|
+
/**
|
529
|
+
* This tracks an individual local variable in a certain lexical context, as
|
530
|
+
* well as the number of times is it read.
|
531
|
+
*/
|
532
|
+
typedef struct {
|
533
|
+
/** The name of the local variable. */
|
534
|
+
pm_constant_id_t name;
|
535
|
+
|
536
|
+
/** The location of the local variable in the source. */
|
537
|
+
pm_location_t location;
|
538
|
+
|
539
|
+
/** The index of the local variable in the local table. */
|
540
|
+
uint32_t index;
|
541
|
+
|
542
|
+
/** The number of times the local variable is read. */
|
543
|
+
uint32_t reads;
|
544
|
+
|
545
|
+
/** The hash of the local variable. */
|
546
|
+
uint32_t hash;
|
547
|
+
} pm_local_t;
|
548
|
+
|
549
|
+
/**
|
550
|
+
* This is a set of local variables in a certain lexical context (method, class,
|
551
|
+
* module, etc.). We need to track how many times these variables are read in
|
552
|
+
* order to warn if they only get written.
|
553
|
+
*/
|
554
|
+
typedef struct pm_locals {
|
555
|
+
/** The number of local variables in the set. */
|
556
|
+
uint32_t size;
|
557
|
+
|
558
|
+
/** The capacity of the local variables set. */
|
559
|
+
uint32_t capacity;
|
560
|
+
|
561
|
+
/** The nullable allocated memory for the local variables in the set. */
|
562
|
+
pm_local_t *locals;
|
563
|
+
} pm_locals_t;
|
564
|
+
|
565
|
+
/** The flags about scope parameters that can be set. */
|
566
|
+
typedef uint8_t pm_scope_parameters_t;
|
567
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
568
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
|
569
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
|
570
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
|
571
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
|
572
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
|
573
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
|
574
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
|
575
|
+
|
576
|
+
/**
|
577
|
+
* This struct represents a node in a linked list of scopes. Some scopes can see
|
578
|
+
* into their parent scopes, while others cannot.
|
579
|
+
*/
|
580
|
+
typedef struct pm_scope {
|
581
|
+
/** A pointer to the previous scope in the linked list. */
|
582
|
+
struct pm_scope *previous;
|
583
|
+
|
584
|
+
/** The IDs of the locals in the given scope. */
|
585
|
+
pm_locals_t locals;
|
586
|
+
|
587
|
+
/**
|
588
|
+
* This is a list of the implicit parameters contained within the block.
|
589
|
+
* These will be processed after the block is parsed to determine the kind
|
590
|
+
* of parameters node that should be used and to check if any errors need to
|
591
|
+
* be added.
|
592
|
+
*/
|
593
|
+
pm_node_list_t implicit_parameters;
|
594
|
+
|
595
|
+
/**
|
596
|
+
* This is a bitfield that indicates the parameters that are being used in
|
597
|
+
* this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
|
598
|
+
* There are three different kinds of parameters that can be used in a
|
599
|
+
* scope:
|
600
|
+
*
|
601
|
+
* - Ordinary parameters (e.g., def foo(bar); end)
|
602
|
+
* - Numbered parameters (e.g., def foo; _1; end)
|
603
|
+
* - The it parameter (e.g., def foo; it; end)
|
604
|
+
*
|
605
|
+
* If ordinary parameters are being used, then certain parameters can be
|
606
|
+
* forwarded to another method/structure. Those are indicated by four
|
607
|
+
* additional bits in the params field. For example, some combinations of:
|
608
|
+
*
|
609
|
+
* - def foo(*); end
|
610
|
+
* - def foo(**); end
|
611
|
+
* - def foo(&); end
|
612
|
+
* - def foo(...); end
|
613
|
+
*/
|
614
|
+
pm_scope_parameters_t parameters;
|
615
|
+
|
616
|
+
/**
|
617
|
+
* The current state of constant shareability for this scope. This is
|
618
|
+
* changed by magic shareable_constant_value comments.
|
619
|
+
*/
|
620
|
+
pm_shareable_constant_value_t shareable_constant;
|
621
|
+
|
622
|
+
/**
|
623
|
+
* A boolean indicating whether or not this scope can see into its parent.
|
624
|
+
* If closed is true, then the scope cannot see into its parent.
|
625
|
+
*/
|
626
|
+
bool closed;
|
627
|
+
} pm_scope_t;
|
628
|
+
|
629
|
+
/**
|
630
|
+
* A struct that represents a stack of boolean values.
|
631
|
+
*/
|
632
|
+
typedef uint32_t pm_state_stack_t;
|
633
|
+
|
634
|
+
/**
|
635
|
+
* This struct represents the overall parser. It contains a reference to the
|
636
|
+
* source file, as well as pointers that indicate where in the source it's
|
637
|
+
* currently parsing. It also contains the most recent and current token that
|
638
|
+
* it's considering.
|
639
|
+
*/
|
640
|
+
struct pm_parser {
|
641
|
+
/**
|
642
|
+
* The next node identifier that will be assigned. This is a unique
|
643
|
+
* identifier used to track nodes such that the syntax tree can be dropped
|
644
|
+
* but the node can be found through another parse.
|
645
|
+
*/
|
646
|
+
uint32_t node_id;
|
647
|
+
|
648
|
+
/** The current state of the lexer. */
|
649
|
+
pm_lex_state_t lex_state;
|
650
|
+
|
651
|
+
/** Tracks the current nesting of (), [], and {}. */
|
652
|
+
int enclosure_nesting;
|
653
|
+
|
654
|
+
/**
|
655
|
+
* Used to temporarily track the nesting of enclosures to determine if a {
|
656
|
+
* is the beginning of a lambda following the parameters of a lambda.
|
657
|
+
*/
|
658
|
+
int lambda_enclosure_nesting;
|
659
|
+
|
660
|
+
/**
|
661
|
+
* Used to track the nesting of braces to ensure we get the correct value
|
662
|
+
* when we are interpolating blocks with braces.
|
663
|
+
*/
|
664
|
+
int brace_nesting;
|
665
|
+
|
666
|
+
/**
|
667
|
+
* The stack used to determine if a do keyword belongs to the predicate of a
|
668
|
+
* while, until, or for loop.
|
669
|
+
*/
|
670
|
+
pm_state_stack_t do_loop_stack;
|
671
|
+
|
672
|
+
/**
|
673
|
+
* The stack used to determine if a do keyword belongs to the beginning of a
|
674
|
+
* block.
|
675
|
+
*/
|
676
|
+
pm_state_stack_t accepts_block_stack;
|
677
|
+
|
678
|
+
/** A stack of lex modes. */
|
679
|
+
struct {
|
680
|
+
/** The current mode of the lexer. */
|
681
|
+
pm_lex_mode_t *current;
|
682
|
+
|
683
|
+
/** The stack of lexer modes. */
|
684
|
+
pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
|
685
|
+
|
686
|
+
/** The current index into the lexer mode stack. */
|
687
|
+
size_t index;
|
688
|
+
} lex_modes;
|
689
|
+
|
690
|
+
/** The pointer to the start of the source. */
|
691
|
+
const uint8_t *start;
|
692
|
+
|
693
|
+
/** The pointer to the end of the source. */
|
694
|
+
const uint8_t *end;
|
695
|
+
|
696
|
+
/** The previous token we were considering. */
|
697
|
+
pm_token_t previous;
|
698
|
+
|
699
|
+
/** The current token we're considering. */
|
700
|
+
pm_token_t current;
|
701
|
+
|
702
|
+
/**
|
703
|
+
* This is a special field set on the parser when we need the parser to jump
|
704
|
+
* to a specific location when lexing the next token, as opposed to just
|
705
|
+
* using the end of the previous token. Normally this is NULL.
|
706
|
+
*/
|
707
|
+
const uint8_t *next_start;
|
708
|
+
|
709
|
+
/**
|
710
|
+
* This field indicates the end of a heredoc whose identifier was found on
|
711
|
+
* the current line. If another heredoc is found on the same line, then this
|
712
|
+
* will be moved forward to the end of that heredoc. If no heredocs are
|
713
|
+
* found on a line then this is NULL.
|
714
|
+
*/
|
715
|
+
const uint8_t *heredoc_end;
|
716
|
+
|
717
|
+
/** The list of comments that have been found while parsing. */
|
718
|
+
pm_list_t comment_list;
|
719
|
+
|
720
|
+
/** The list of magic comments that have been found while parsing. */
|
721
|
+
pm_list_t magic_comment_list;
|
722
|
+
|
723
|
+
/**
|
724
|
+
* An optional location that represents the location of the __END__ marker
|
725
|
+
* and the rest of the content of the file. This content is loaded into the
|
726
|
+
* DATA constant when the file being parsed is the main file being executed.
|
727
|
+
*/
|
728
|
+
pm_location_t data_loc;
|
729
|
+
|
730
|
+
/** The list of warnings that have been found while parsing. */
|
731
|
+
pm_list_t warning_list;
|
732
|
+
|
733
|
+
/** The list of errors that have been found while parsing. */
|
734
|
+
pm_list_t error_list;
|
735
|
+
|
736
|
+
/** The current local scope. */
|
737
|
+
pm_scope_t *current_scope;
|
738
|
+
|
739
|
+
/** The current parsing context. */
|
740
|
+
pm_context_node_t *current_context;
|
741
|
+
|
742
|
+
/**
|
743
|
+
* The hash keys for the hash that is currently being parsed. This is not
|
744
|
+
* usually necessary because it can pass it down the various call chains,
|
745
|
+
* but in the event that you're parsing a hash that is being directly
|
746
|
+
* pushed into another hash with **, we need to share the hash keys so that
|
747
|
+
* we can warn for the nested hash as well.
|
748
|
+
*/
|
749
|
+
pm_static_literals_t *current_hash_keys;
|
750
|
+
|
751
|
+
/**
|
752
|
+
* The encoding functions for the current file is attached to the parser as
|
753
|
+
* it's parsing so that it can change with a magic comment.
|
754
|
+
*/
|
755
|
+
const pm_encoding_t *encoding;
|
756
|
+
|
757
|
+
/**
|
758
|
+
* When the encoding that is being used to parse the source is changed by
|
759
|
+
* prism, we provide the ability here to call out to a user-defined
|
760
|
+
* function.
|
761
|
+
*/
|
762
|
+
pm_encoding_changed_callback_t encoding_changed_callback;
|
763
|
+
|
764
|
+
/**
|
765
|
+
* This pointer indicates where a comment must start if it is to be
|
766
|
+
* considered an encoding comment.
|
767
|
+
*/
|
768
|
+
const uint8_t *encoding_comment_start;
|
769
|
+
|
770
|
+
/**
|
771
|
+
* This is an optional callback that can be attached to the parser that will
|
772
|
+
* be called whenever a new token is lexed by the parser.
|
773
|
+
*/
|
774
|
+
pm_lex_callback_t *lex_callback;
|
775
|
+
|
776
|
+
/**
|
777
|
+
* This is the path of the file being parsed. We use the filepath when
|
778
|
+
* constructing SourceFileNodes.
|
779
|
+
*/
|
780
|
+
pm_string_t filepath;
|
781
|
+
|
782
|
+
/**
|
783
|
+
* This constant pool keeps all of the constants defined throughout the file
|
784
|
+
* so that we can reference them later.
|
785
|
+
*/
|
786
|
+
pm_constant_pool_t constant_pool;
|
787
|
+
|
788
|
+
/** This is the list of newline offsets in the source file. */
|
789
|
+
pm_newline_list_t newline_list;
|
790
|
+
|
791
|
+
/**
|
792
|
+
* We want to add a flag to integer nodes that indicates their base. We only
|
793
|
+
* want to parse these once, but we don't have space on the token itself to
|
794
|
+
* communicate this information. So we store it here and pass it through
|
795
|
+
* when we find tokens that we need it for.
|
796
|
+
*/
|
797
|
+
pm_node_flags_t integer_base;
|
798
|
+
|
799
|
+
/**
|
800
|
+
* This string is used to pass information from the lexer to the parser. It
|
801
|
+
* is particularly necessary because of escape sequences.
|
802
|
+
*/
|
803
|
+
pm_string_t current_string;
|
804
|
+
|
805
|
+
/**
|
806
|
+
* The line number at the start of the parse. This will be used to offset
|
807
|
+
* the line numbers of all of the locations.
|
808
|
+
*/
|
809
|
+
int32_t start_line;
|
810
|
+
|
811
|
+
/**
|
812
|
+
* When a string-like expression is being lexed, any byte or escape sequence
|
813
|
+
* that resolves to a value whose top bit is set (i.e., >= 0x80) will
|
814
|
+
* explicitly set the encoding to the same encoding as the source.
|
815
|
+
* Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
|
816
|
+
* resolves to a value whose top bit is set, then the encoding will be
|
817
|
+
* explicitly set to UTF-8.
|
818
|
+
*
|
819
|
+
* The _next_ time this happens, if the encoding that is about to become the
|
820
|
+
* explicitly set encoding does not match the previously set explicit
|
821
|
+
* encoding, a mixed encoding error will be emitted.
|
822
|
+
*
|
823
|
+
* When the expression is finished being lexed, the explicit encoding
|
824
|
+
* controls the encoding of the expression. For the most part this means
|
825
|
+
* that the expression will either be encoded in the source encoding or
|
826
|
+
* UTF-8. This holds for all encodings except US-ASCII. If the source is
|
827
|
+
* US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
|
828
|
+
* expression will be encoded as ASCII-8BIT.
|
829
|
+
*
|
830
|
+
* Note that if the expression is a list, different elements within the same
|
831
|
+
* list can have different encodings, so this will get reset between each
|
832
|
+
* element. Furthermore all of this only applies to lists that support
|
833
|
+
* interpolation, because otherwise escapes that could change the encoding
|
834
|
+
* are ignored.
|
835
|
+
*
|
836
|
+
* At first glance, it may make more sense for this to live on the lexer
|
837
|
+
* mode, but we need it here to communicate back to the parser for character
|
838
|
+
* literals that do not push a new lexer mode.
|
839
|
+
*/
|
840
|
+
const pm_encoding_t *explicit_encoding;
|
841
|
+
|
842
|
+
/**
|
843
|
+
* When parsing block exits (e.g., break, next, redo), we need to validate
|
844
|
+
* that they are in correct contexts. For the most part we can do this by
|
845
|
+
* looking at our parent contexts. However, modifier while and until
|
846
|
+
* expressions can change that context to make block exits valid. In these
|
847
|
+
* cases, we need to keep track of the block exits and then validate them
|
848
|
+
* after the expression has been parsed.
|
849
|
+
*
|
850
|
+
* We use a pointer here because we don't want to keep a whole list attached
|
851
|
+
* since this will only be used in the context of begin/end expressions.
|
852
|
+
*/
|
853
|
+
pm_node_list_t *current_block_exits;
|
854
|
+
|
855
|
+
/** The version of prism that we should use to parse. */
|
856
|
+
pm_options_version_t version;
|
857
|
+
|
858
|
+
/** The command line flags given from the options. */
|
859
|
+
uint8_t command_line;
|
860
|
+
|
861
|
+
/**
|
862
|
+
* Whether or not we have found a frozen_string_literal magic comment with
|
863
|
+
* a true or false value.
|
864
|
+
* May be:
|
865
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
|
866
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
|
867
|
+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
868
|
+
*/
|
869
|
+
int8_t frozen_string_literal;
|
870
|
+
|
871
|
+
/**
|
872
|
+
* Whether or not we are parsing an eval string. This impacts whether or not
|
873
|
+
* we should evaluate if block exits/yields are valid.
|
874
|
+
*/
|
875
|
+
bool parsing_eval;
|
876
|
+
|
877
|
+
/**
|
878
|
+
* Whether or not we are parsing a "partial" script, which is a script that
|
879
|
+
* will be evaluated in the context of another script, so we should not
|
880
|
+
* check jumps (next/break/etc.) for validity.
|
881
|
+
*/
|
882
|
+
bool partial_script;
|
883
|
+
|
884
|
+
/** Whether or not we're at the beginning of a command. */
|
885
|
+
bool command_start;
|
886
|
+
|
887
|
+
/** Whether or not we're currently recovering from a syntax error. */
|
888
|
+
bool recovering;
|
889
|
+
|
890
|
+
/**
|
891
|
+
* This is very specialized behavior for when you want to parse in a context
|
892
|
+
* that does not respect encoding comments. Its main use case is translating
|
893
|
+
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
894
|
+
* before they are parsed and ignores encoding comments.
|
895
|
+
*/
|
896
|
+
bool encoding_locked;
|
897
|
+
|
898
|
+
/**
|
899
|
+
* Whether or not the encoding has been changed by a magic comment. We use
|
900
|
+
* this to provide a fast path for the lexer instead of going through the
|
901
|
+
* function pointer.
|
902
|
+
*/
|
903
|
+
bool encoding_changed;
|
904
|
+
|
905
|
+
/**
|
906
|
+
* This flag indicates that we are currently parsing a pattern matching
|
907
|
+
* expression and impacts that calculation of newlines.
|
908
|
+
*/
|
909
|
+
bool pattern_matching_newlines;
|
910
|
+
|
911
|
+
/** This flag indicates that we are currently parsing a keyword argument. */
|
912
|
+
bool in_keyword_arg;
|
913
|
+
|
914
|
+
/**
|
915
|
+
* Whether or not the parser has seen a token that has semantic meaning
|
916
|
+
* (i.e., a token that is not a comment or whitespace).
|
917
|
+
*/
|
918
|
+
bool semantic_token_seen;
|
919
|
+
|
920
|
+
/**
|
921
|
+
* True if the current regular expression being lexed contains only ASCII
|
922
|
+
* characters.
|
923
|
+
*/
|
924
|
+
bool current_regular_expression_ascii_only;
|
925
|
+
|
926
|
+
/**
|
927
|
+
* By default, Ruby always warns about mismatched indentation. This can be
|
928
|
+
* toggled with a magic comment.
|
929
|
+
*/
|
930
|
+
bool warn_mismatched_indentation;
|
931
|
+
};
|
932
|
+
|
933
|
+
#endif
|