prism 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +172 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +84 -0
- data/README.md +89 -0
- data/config.yml +2481 -0
- data/docs/build_system.md +74 -0
- data/docs/building.md +22 -0
- data/docs/configuration.md +60 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +117 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +25 -0
- data/docs/serialization.md +181 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +4725 -0
- data/ext/prism/api_pack.c +256 -0
- data/ext/prism/extconf.rb +136 -0
- data/ext/prism/extension.c +626 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/prism/enc/pm_encoding.h +95 -0
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/prism/parser.h +418 -0
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/prism/util/pm_char.h +91 -0
- data/include/prism/util/pm_constant_pool.h +78 -0
- data/include/prism/util/pm_list.h +67 -0
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/prism/util/pm_newline_list.h +61 -0
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/prism/util/pm_string.h +61 -0
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/prism/util/pm_strpbrk.h +29 -0
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/prism/desugar_compiler.rb +206 -0
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/prism/ffi.rb +251 -0
- data/lib/prism/lex_compat.rb +838 -0
- data/lib/prism/mutation_compiler.rb +718 -0
- data/lib/prism/node.rb +14540 -0
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +185 -0
- data/lib/prism/parse_result/comments.rb +172 -0
- data/lib/prism/parse_result/newlines.rb +60 -0
- data/lib/prism/parse_result.rb +266 -0
- data/lib/prism/pattern.rb +239 -0
- data/lib/prism/ripper_compat.rb +174 -0
- data/lib/prism/serialize.rb +662 -0
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/prism.gemspec +113 -0
- data/src/diagnostic.c +287 -0
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/pm_gbk.c +61 -0
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/pm_tables.c +507 -0
- data/src/enc/pm_unicode.c +2324 -0
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +2633 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +2136 -0
- data/src/prism.c +14587 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1899 -0
- data/src/token_type.c +349 -0
- data/src/unescape.c +637 -0
- data/src/util/pm_buffer.c +103 -0
- data/src/util/pm_char.c +272 -0
- data/src/util/pm_constant_pool.c +252 -0
- data/src/util/pm_list.c +41 -0
- data/src/util/pm_memchr.c +33 -0
- data/src/util/pm_newline_list.c +134 -0
- data/src/util/pm_state_stack.c +19 -0
- data/src/util/pm_string.c +200 -0
- data/src/util/pm_string_list.c +29 -0
- data/src/util/pm_strncasecmp.c +17 -0
- data/src/util/pm_strpbrk.c +66 -0
- metadata +138 -0
data/src/diagnostic.c
ADDED
@@ -0,0 +1,287 @@
|
|
1
|
+
#include "prism/diagnostic.h"
|
2
|
+
|
3
|
+
/*
|
4
|
+
## Message composition
|
5
|
+
|
6
|
+
When composing an error message, use sentence fragments.
|
7
|
+
|
8
|
+
Try describing the property of the code that caused the error, rather than the rule that is being
|
9
|
+
violated. It may help to use a fragment that completes a sentence beginning, "The parser
|
10
|
+
encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
|
11
|
+
context) after a semicolon.
|
12
|
+
|
13
|
+
For example:, instead of "Control escape sequence cannot be doubled", prefer:
|
14
|
+
|
15
|
+
> "Invalid control escape sequence; control cannot be repeated"
|
16
|
+
|
17
|
+
In some cases, where the failure is more general or syntax expectations are violated, it may make
|
18
|
+
more sense to use a fragment that completes a sentence beginning, "The parser ...".
|
19
|
+
|
20
|
+
For example:
|
21
|
+
|
22
|
+
> "Expected an expression after `(`"
|
23
|
+
> "Cannot parse the expression"
|
24
|
+
|
25
|
+
|
26
|
+
## Message style guide
|
27
|
+
|
28
|
+
- Use articles like "a", "an", and "the" when appropriate.
|
29
|
+
- e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
|
30
|
+
- Use the common name for tokens and nodes.
|
31
|
+
- e.g., prefer "keyword splat" to "assoc splat"
|
32
|
+
- e.g., prefer "embedded document" to "embdoc"
|
33
|
+
- Capitalize the initial word of the message.
|
34
|
+
- Use back ticks around token literals
|
35
|
+
- e.g., "Expected a `=>` between the hash key and value"
|
36
|
+
- Do not use `.` or other punctuation at the end of the message.
|
37
|
+
- Do not use contractions like "can't". Prefer "cannot" to "can not".
|
38
|
+
- For tokens that can have multiple meanings, reference the token and its meaning.
|
39
|
+
- e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
|
40
|
+
|
41
|
+
|
42
|
+
## Error names (PM_ERR_*)
|
43
|
+
|
44
|
+
- When appropriate, prefer node name to token name.
|
45
|
+
- e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
|
46
|
+
- Prefer token name to common name.
|
47
|
+
- e.g., prefer "STAR" to "ASTERISK".
|
48
|
+
- Try to order the words in the name from more general to more specific,
|
49
|
+
- e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
|
50
|
+
- When in doubt, look for similar patterns and name them so that they are grouped when lexically
|
51
|
+
sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
|
52
|
+
*/
|
53
|
+
|
54
|
+
static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
55
|
+
[PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
|
56
|
+
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
|
57
|
+
[PM_ERR_ARGUMENT_AFTER_BLOCK] = "Unexpected argument after a block argument",
|
58
|
+
[PM_ERR_ARGUMENT_BARE_HASH] = "Unexpected bare hash argument",
|
59
|
+
[PM_ERR_ARGUMENT_BLOCK_MULTI] = "Multiple block arguments; only one block is allowed",
|
60
|
+
[PM_ERR_ARGUMENT_FORMAL_CLASS] = "Invalid formal argument; formal argument cannot be a class variable",
|
61
|
+
[PM_ERR_ARGUMENT_FORMAL_CONSTANT] = "Invalid formal argument; formal argument cannot be a constant",
|
62
|
+
[PM_ERR_ARGUMENT_FORMAL_GLOBAL] = "Invalid formal argument; formal argument cannot be a global variable",
|
63
|
+
[PM_ERR_ARGUMENT_FORMAL_IVAR] = "Invalid formal argument; formal argument cannot be an instance variable",
|
64
|
+
[PM_ERR_ARGUMENT_NO_FORWARDING_AMP] = "Unexpected `&` when the parent method is not forwarding",
|
65
|
+
[PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = "Unexpected `...` when the parent method is not forwarding",
|
66
|
+
[PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = "Unexpected `*` when the parent method is not forwarding",
|
67
|
+
[PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = "Unexpected `*` splat argument after a `**` keyword splat argument",
|
68
|
+
[PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = "Unexpected `*` splat argument after a `*` splat argument",
|
69
|
+
[PM_ERR_ARGUMENT_TERM_PAREN] = "Expected a `)` to close the arguments",
|
70
|
+
[PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = "Unexpected `{` after a method call without parenthesis",
|
71
|
+
[PM_ERR_ARRAY_ELEMENT] = "Expected an element for the array",
|
72
|
+
[PM_ERR_ARRAY_EXPRESSION] = "Expected an expression for the array element",
|
73
|
+
[PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = "Expected an expression after `*` in the array",
|
74
|
+
[PM_ERR_ARRAY_SEPARATOR] = "Expected a `,` separator for the array elements",
|
75
|
+
[PM_ERR_ARRAY_TERM] = "Expected a `]` to close the array",
|
76
|
+
[PM_ERR_BEGIN_LONELY_ELSE] = "Unexpected `else` in `begin` block; a `rescue` clause must precede `else`",
|
77
|
+
[PM_ERR_BEGIN_TERM] = "Expected an `end` to close the `begin` statement",
|
78
|
+
[PM_ERR_BEGIN_UPCASE_BRACE] = "Expected a `{` after `BEGIN`",
|
79
|
+
[PM_ERR_BEGIN_UPCASE_TERM] = "Expected a `}` to close the `BEGIN` statement",
|
80
|
+
[PM_ERR_BEGIN_UPCASE_TOPLEVEL] = "BEGIN is permitted only at toplevel",
|
81
|
+
[PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = "Expected a local variable name in the block parameters",
|
82
|
+
[PM_ERR_BLOCK_PARAM_PIPE_TERM] = "Expected the block parameters to end with `|`",
|
83
|
+
[PM_ERR_BLOCK_TERM_BRACE] = "Expected a block beginning with `{` to end with `}`",
|
84
|
+
[PM_ERR_BLOCK_TERM_END] = "Expected a block beginning with `do` to end with `end`",
|
85
|
+
[PM_ERR_CANNOT_PARSE_EXPRESSION] = "Cannot parse the expression",
|
86
|
+
[PM_ERR_CANNOT_PARSE_STRING_PART] = "Cannot parse the string part",
|
87
|
+
[PM_ERR_CASE_EXPRESSION_AFTER_CASE] = "Expected an expression after `case`",
|
88
|
+
[PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = "Expected an expression after `when`",
|
89
|
+
[PM_ERR_CASE_MISSING_CONDITIONS] = "Expected a `when` or `in` clause after `case`",
|
90
|
+
[PM_ERR_CASE_TERM] = "Expected an `end` to close the `case` statement",
|
91
|
+
[PM_ERR_CLASS_IN_METHOD] = "Unexpected class definition in a method body",
|
92
|
+
[PM_ERR_CLASS_NAME] = "Expected a constant name after `class`",
|
93
|
+
[PM_ERR_CLASS_SUPERCLASS] = "Expected a superclass after `<`",
|
94
|
+
[PM_ERR_CLASS_TERM] = "Expected an `end` to close the `class` statement",
|
95
|
+
[PM_ERR_CLASS_UNEXPECTED_END] = "Unexpected `end`, expecting ';' or '\n'",
|
96
|
+
[PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = "Expected a predicate expression for the `elsif` statement",
|
97
|
+
[PM_ERR_CONDITIONAL_IF_PREDICATE] = "Expected a predicate expression for the `if` statement",
|
98
|
+
[PM_ERR_CONDITIONAL_PREDICATE_TERM] = "Expected `then` or `;` or '\n'",
|
99
|
+
[PM_ERR_CONDITIONAL_TERM] = "Expected an `end` to close the conditional clause",
|
100
|
+
[PM_ERR_CONDITIONAL_TERM_ELSE] = "Expected an `end` to close the `else` clause",
|
101
|
+
[PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = "Expected a predicate expression for the `unless` statement",
|
102
|
+
[PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = "Expected a predicate expression for the `until` statement",
|
103
|
+
[PM_ERR_CONDITIONAL_WHILE_PREDICATE] = "Expected a predicate expression for the `while` statement",
|
104
|
+
[PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = "Expected a constant after the `::` operator",
|
105
|
+
[PM_ERR_DEF_ENDLESS] = "Could not parse the endless method body",
|
106
|
+
[PM_ERR_DEF_ENDLESS_SETTER] = "Invalid method name; a setter method cannot be defined in an endless method definition",
|
107
|
+
[PM_ERR_DEF_NAME] = "Expected a method name",
|
108
|
+
[PM_ERR_DEF_NAME_AFTER_RECEIVER] = "Expected a method name after the receiver",
|
109
|
+
[PM_ERR_DEF_PARAMS_TERM] = "Expected a delimiter to close the parameters",
|
110
|
+
[PM_ERR_DEF_PARAMS_TERM_PAREN] = "Expected a `)` to close the parameters",
|
111
|
+
[PM_ERR_DEF_RECEIVER] = "Expected a receiver for the method definition",
|
112
|
+
[PM_ERR_DEF_RECEIVER_TERM] = "Expected a `.` or `::` after the receiver in a method definition",
|
113
|
+
[PM_ERR_DEF_TERM] = "Expected an `end` to close the `def` statement",
|
114
|
+
[PM_ERR_DEFINED_EXPRESSION] = "Expected an expression after `defined?`",
|
115
|
+
[PM_ERR_EMBDOC_TERM] = "Could not find a terminator for the embedded document",
|
116
|
+
[PM_ERR_EMBEXPR_END] = "Expected a `}` to close the embedded expression",
|
117
|
+
[PM_ERR_EMBVAR_INVALID] = "Invalid embedded variable",
|
118
|
+
[PM_ERR_END_UPCASE_BRACE] = "Expected a `{` after `END`",
|
119
|
+
[PM_ERR_END_UPCASE_TERM] = "Expected a `}` to close the `END` statement",
|
120
|
+
[PM_ERR_ESCAPE_INVALID_CONTROL] = "Invalid control escape sequence",
|
121
|
+
[PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = "Invalid control escape sequence; control cannot be repeated",
|
122
|
+
[PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = "Invalid hexadecimal escape sequence",
|
123
|
+
[PM_ERR_ESCAPE_INVALID_META] = "Invalid meta escape sequence",
|
124
|
+
[PM_ERR_ESCAPE_INVALID_META_REPEAT] = "Invalid meta escape sequence; meta cannot be repeated",
|
125
|
+
[PM_ERR_ESCAPE_INVALID_UNICODE] = "Invalid Unicode escape sequence",
|
126
|
+
[PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = "Invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags",
|
127
|
+
[PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = "Invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal",
|
128
|
+
[PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = "Invalid Unicode escape sequence; maximum length is 6 digits",
|
129
|
+
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = "Invalid Unicode escape sequence; needs closing `}`",
|
130
|
+
[PM_ERR_EXPECT_ARGUMENT] = "Expected an argument",
|
131
|
+
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = "Expected a newline or semicolon after the statement",
|
132
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = "Expected an expression after `&&=`",
|
133
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = "Expected an expression after `||=`",
|
134
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = "Expected an expression after `,`",
|
135
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = "Expected an expression after `=`",
|
136
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = "Expected an expression after `<<`",
|
137
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = "Expected an expression after `(`",
|
138
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = "Expected an expression after the operator",
|
139
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = "Expected an expression after `*` splat in an argument",
|
140
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = "Expected an expression after `**` in a hash",
|
141
|
+
[PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = "Expected an expression after `*`",
|
142
|
+
[PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = "Expected an identifier for the required parameter",
|
143
|
+
[PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = "Expected a `(` to start a required parameter",
|
144
|
+
[PM_ERR_EXPECT_RBRACKET] = "Expected a matching `]`",
|
145
|
+
[PM_ERR_EXPECT_RPAREN] = "Expected a matching `)`",
|
146
|
+
[PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = "Expected a `)` after multiple assignment",
|
147
|
+
[PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = "Expected a `)` to end a required parameter",
|
148
|
+
[PM_ERR_EXPECT_STRING_CONTENT] = "Expected string content after opening string delimiter",
|
149
|
+
[PM_ERR_EXPECT_WHEN_DELIMITER] = "Expected a delimiter after the predicates of a `when` clause",
|
150
|
+
[PM_ERR_EXPRESSION_BARE_HASH] = "Unexpected bare hash in expression",
|
151
|
+
[PM_ERR_FOR_COLLECTION] = "Expected a collection after the `in` in a `for` statement",
|
152
|
+
[PM_ERR_FOR_INDEX] = "Expected an index after `for`",
|
153
|
+
[PM_ERR_FOR_IN] = "Expected an `in` after the index in a `for` statement",
|
154
|
+
[PM_ERR_FOR_TERM] = "Expected an `end` to close the `for` loop",
|
155
|
+
[PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = "Expected an expression after the label in a hash",
|
156
|
+
[PM_ERR_HASH_KEY] = "Expected a key in the hash literal",
|
157
|
+
[PM_ERR_HASH_ROCKET] = "Expected a `=>` between the hash key and value",
|
158
|
+
[PM_ERR_HASH_TERM] = "Expected a `}` to close the hash literal",
|
159
|
+
[PM_ERR_HASH_VALUE] = "Expected a value in the hash literal",
|
160
|
+
[PM_ERR_HEREDOC_TERM] = "Could not find a terminator for the heredoc",
|
161
|
+
[PM_ERR_INCOMPLETE_QUESTION_MARK] = "Incomplete expression at `?`",
|
162
|
+
[PM_ERR_INCOMPLETE_VARIABLE_CLASS] = "Incomplete class variable",
|
163
|
+
[PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = "Incomplete instance variable",
|
164
|
+
[PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = "Unknown or invalid encoding in the magic comment",
|
165
|
+
[PM_ERR_INVALID_FLOAT_EXPONENT] = "Invalid exponent",
|
166
|
+
[PM_ERR_INVALID_NUMBER_BINARY] = "Invalid binary number",
|
167
|
+
[PM_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
|
168
|
+
[PM_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
|
169
|
+
[PM_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
|
170
|
+
[PM_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
|
171
|
+
[PM_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
|
172
|
+
[PM_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
|
173
|
+
[PM_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
|
174
|
+
[PM_ERR_LAMBDA_OPEN] = "Expected a `do` keyword or a `{` to open the lambda block",
|
175
|
+
[PM_ERR_LAMBDA_TERM_BRACE] = "Expected a lambda block beginning with `{` to end with `}`",
|
176
|
+
[PM_ERR_LAMBDA_TERM_END] = "Expected a lambda block beginning with `do` to end with `end`",
|
177
|
+
[PM_ERR_LIST_I_LOWER_ELEMENT] = "Expected a symbol in a `%i` list",
|
178
|
+
[PM_ERR_LIST_I_LOWER_TERM] = "Expected a closing delimiter for the `%i` list",
|
179
|
+
[PM_ERR_LIST_I_UPPER_ELEMENT] = "Expected a symbol in a `%I` list",
|
180
|
+
[PM_ERR_LIST_I_UPPER_TERM] = "Expected a closing delimiter for the `%I` list",
|
181
|
+
[PM_ERR_LIST_W_LOWER_ELEMENT] = "Expected a string in a `%w` list",
|
182
|
+
[PM_ERR_LIST_W_LOWER_TERM] = "Expected a closing delimiter for the `%w` list",
|
183
|
+
[PM_ERR_LIST_W_UPPER_ELEMENT] = "Expected a string in a `%W` list",
|
184
|
+
[PM_ERR_LIST_W_UPPER_TERM] = "Expected a closing delimiter for the `%W` list",
|
185
|
+
[PM_ERR_MALLOC_FAILED] = "Failed to allocate memory",
|
186
|
+
[PM_ERR_MODULE_IN_METHOD] = "Unexpected module definition in a method body",
|
187
|
+
[PM_ERR_MODULE_NAME] = "Expected a constant name after `module`",
|
188
|
+
[PM_ERR_MODULE_TERM] = "Expected an `end` to close the `module` statement",
|
189
|
+
[PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = "Multiple splats in multiple assignment",
|
190
|
+
[PM_ERR_NOT_EXPRESSION] = "Expected an expression after `not`",
|
191
|
+
[PM_ERR_NUMBER_LITERAL_UNDERSCORE] = "Number literal ending with a `_`",
|
192
|
+
[PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED] = "Numbered parameters are not allowed alongside explicit parameters",
|
193
|
+
[PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE] = "Numbered parameter is already used in outer scope",
|
194
|
+
[PM_ERR_OPERATOR_MULTI_ASSIGN] = "Unexpected operator for a multiple assignment",
|
195
|
+
[PM_ERR_OPERATOR_WRITE_BLOCK] = "Unexpected operator after a call with a block",
|
196
|
+
[PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = "Unexpected multiple `**` splat parameters",
|
197
|
+
[PM_ERR_PARAMETER_BLOCK_MULTI] = "Multiple block parameters; only one block is allowed",
|
198
|
+
[PM_ERR_PARAMETER_METHOD_NAME] = "Unexpected name for a parameter",
|
199
|
+
[PM_ERR_PARAMETER_NAME_REPEAT] = "Repeated parameter name",
|
200
|
+
[PM_ERR_PARAMETER_NO_DEFAULT] = "Expected a default value for the parameter",
|
201
|
+
[PM_ERR_PARAMETER_NO_DEFAULT_KW] = "Expected a default value for the keyword parameter",
|
202
|
+
[PM_ERR_PARAMETER_NUMBERED_RESERVED] = "Token reserved for a numbered parameter",
|
203
|
+
[PM_ERR_PARAMETER_ORDER] = "Unexpected parameter order",
|
204
|
+
[PM_ERR_PARAMETER_SPLAT_MULTI] = "Unexpected multiple `*` splat parameters",
|
205
|
+
[PM_ERR_PARAMETER_STAR] = "Unexpected parameter `*`",
|
206
|
+
[PM_ERR_PARAMETER_UNEXPECTED_FWD] = "Unexpected `...` in parameters",
|
207
|
+
[PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = "Unexpected `,` in parameters",
|
208
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = "Expected a pattern expression after the `[` operator",
|
209
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = "Expected a pattern expression after `,`",
|
210
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = "Expected a pattern expression after `=>`",
|
211
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = "Expected a pattern expression after the `in` keyword",
|
212
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = "Expected a pattern expression after the key",
|
213
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = "Expected a pattern expression after the `(` operator",
|
214
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = "Expected a pattern expression after the `^` pin operator",
|
215
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = "Expected a pattern expression after the `|` operator",
|
216
|
+
[PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = "Expected a pattern expression after the range operator",
|
217
|
+
[PM_ERR_PATTERN_HASH_KEY] = "Expected a key in the hash pattern",
|
218
|
+
[PM_ERR_PATTERN_HASH_KEY_LABEL] = "Expected a label as the key in the hash pattern", // TODO // THIS // AND // ABOVE // IS WEIRD
|
219
|
+
[PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = "Expected an identifier after the `=>` operator",
|
220
|
+
[PM_ERR_PATTERN_LABEL_AFTER_COMMA] = "Expected a label after the `,` in the hash pattern",
|
221
|
+
[PM_ERR_PATTERN_REST] = "Unexpected rest pattern",
|
222
|
+
[PM_ERR_PATTERN_TERM_BRACE] = "Expected a `}` to close the pattern expression",
|
223
|
+
[PM_ERR_PATTERN_TERM_BRACKET] = "Expected a `]` to close the pattern expression",
|
224
|
+
[PM_ERR_PATTERN_TERM_PAREN] = "Expected a `)` to close the pattern expression",
|
225
|
+
[PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = "Unexpected `||=` in a multiple assignment",
|
226
|
+
[PM_ERR_REGEXP_TERM] = "Expected a closing delimiter for the regular expression",
|
227
|
+
[PM_ERR_RESCUE_EXPRESSION] = "Expected a rescued expression",
|
228
|
+
[PM_ERR_RESCUE_MODIFIER_VALUE] = "Expected a value after the `rescue` modifier",
|
229
|
+
[PM_ERR_RESCUE_TERM] = "Expected a closing delimiter for the `rescue` clause",
|
230
|
+
[PM_ERR_RESCUE_VARIABLE] = "Expected an exception variable after `=>` in a rescue statement",
|
231
|
+
[PM_ERR_RETURN_INVALID] = "Invalid `return` in a class or module body",
|
232
|
+
[PM_ERR_STRING_CONCATENATION] = "Expected a string for concatenation",
|
233
|
+
[PM_ERR_STRING_INTERPOLATED_TERM] = "Expected a closing delimiter for the interpolated string",
|
234
|
+
[PM_ERR_STRING_LITERAL_TERM] = "Expected a closing delimiter for the string literal",
|
235
|
+
[PM_ERR_SYMBOL_INVALID] = "Invalid symbol", // TODO expected symbol? prism.c ~9719
|
236
|
+
[PM_ERR_SYMBOL_TERM_DYNAMIC] = "Expected a closing delimiter for the dynamic symbol",
|
237
|
+
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = "Expected a closing delimiter for the interpolated symbol",
|
238
|
+
[PM_ERR_TERNARY_COLON] = "Expected a `:` after the true expression of a ternary operator",
|
239
|
+
[PM_ERR_TERNARY_EXPRESSION_FALSE] = "Expected an expression after `:` in the ternary operator",
|
240
|
+
[PM_ERR_TERNARY_EXPRESSION_TRUE] = "Expected an expression after `?` in the ternary operator",
|
241
|
+
[PM_ERR_UNDEF_ARGUMENT] = "Invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument",
|
242
|
+
[PM_ERR_UNARY_RECEIVER_BANG] = "Expected a receiver for unary `!`",
|
243
|
+
[PM_ERR_UNARY_RECEIVER_MINUS] = "Expected a receiver for unary `-`",
|
244
|
+
[PM_ERR_UNARY_RECEIVER_PLUS] = "Expected a receiver for unary `+`",
|
245
|
+
[PM_ERR_UNARY_RECEIVER_TILDE] = "Expected a receiver for unary `~`",
|
246
|
+
[PM_ERR_UNTIL_TERM] = "Expected an `end` to close the `until` statement",
|
247
|
+
[PM_ERR_WHILE_TERM] = "Expected an `end` to close the `while` statement",
|
248
|
+
[PM_ERR_WRITE_TARGET_READONLY] = "Immutable variable as a write target",
|
249
|
+
[PM_ERR_WRITE_TARGET_UNEXPECTED] = "Unexpected write target",
|
250
|
+
[PM_ERR_XSTRING_TERM] = "Expected a closing delimiter for the `%x` or backtick string",
|
251
|
+
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = "Ambiguous first argument; put parentheses or a space even after `-` operator",
|
252
|
+
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = "Ambiguous first argument; put parentheses or a space even after `+` operator",
|
253
|
+
[PM_WARN_AMBIGUOUS_PREFIX_STAR] = "Ambiguous `*` has been interpreted as an argument prefix",
|
254
|
+
[PM_WARN_AMBIGUOUS_SLASH] = "Ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator",
|
255
|
+
};
|
256
|
+
|
257
|
+
static const char*
|
258
|
+
pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
|
259
|
+
assert(diag_id < PM_DIAGNOSTIC_ID_LEN);
|
260
|
+
const char *message = diagnostic_messages[diag_id];
|
261
|
+
assert(message);
|
262
|
+
return message;
|
263
|
+
}
|
264
|
+
|
265
|
+
// Append an error to the given list of diagnostic.
|
266
|
+
bool
|
267
|
+
pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
268
|
+
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) malloc(sizeof(pm_diagnostic_t));
|
269
|
+
if (diagnostic == NULL) return false;
|
270
|
+
|
271
|
+
*diagnostic = (pm_diagnostic_t) { .start = start, .end = end, .message = pm_diagnostic_message(diag_id) };
|
272
|
+
pm_list_append(list, (pm_list_node_t *) diagnostic);
|
273
|
+
return true;
|
274
|
+
}
|
275
|
+
|
276
|
+
// Deallocate the internal state of the given diagnostic list.
|
277
|
+
void
|
278
|
+
pm_diagnostic_list_free(pm_list_t *list) {
|
279
|
+
pm_list_node_t *node, *next;
|
280
|
+
|
281
|
+
for (node = list->head; node != NULL; node = next) {
|
282
|
+
next = node->next;
|
283
|
+
|
284
|
+
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) node;
|
285
|
+
free(diagnostic);
|
286
|
+
}
|
287
|
+
}
|
data/src/enc/pm_big5.c
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
|
12
|
+
return 2;
|
13
|
+
}
|
14
|
+
|
15
|
+
return 0;
|
16
|
+
}
|
17
|
+
|
18
|
+
static size_t
|
19
|
+
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
20
|
+
if (pm_encoding_big5_char_width(b, n) == 1) {
|
21
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
22
|
+
} else {
|
23
|
+
return 0;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
static size_t
|
28
|
+
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (pm_encoding_big5_char_width(b, n) == 1) {
|
30
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
31
|
+
} else {
|
32
|
+
return 0;
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
static bool
|
37
|
+
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (pm_encoding_big5_char_width(b, n) == 1) {
|
39
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
40
|
+
} else {
|
41
|
+
return false;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
pm_encoding_t pm_encoding_big5 = {
|
46
|
+
.name = "big5",
|
47
|
+
.char_width = pm_encoding_big5_char_width,
|
48
|
+
.alnum_char = pm_encoding_big5_alnum_char,
|
49
|
+
.alpha_char = pm_encoding_big5_alpha_char,
|
50
|
+
.isupper_char = pm_encoding_big5_isupper_char,
|
51
|
+
.multibyte = true
|
52
|
+
};
|
data/src/enc/pm_euc_jp.c
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
(
|
14
|
+
((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
|
15
|
+
((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
|
16
|
+
)
|
17
|
+
) {
|
18
|
+
return 2;
|
19
|
+
}
|
20
|
+
|
21
|
+
return 0;
|
22
|
+
}
|
23
|
+
|
24
|
+
static size_t
|
25
|
+
pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
26
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
27
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
28
|
+
} else {
|
29
|
+
return 0;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
static size_t
|
34
|
+
pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
35
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
36
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
37
|
+
} else {
|
38
|
+
return 0;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static bool
|
43
|
+
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
44
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
45
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
46
|
+
} else {
|
47
|
+
return 0;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
pm_encoding_t pm_encoding_euc_jp = {
|
52
|
+
.name = "euc-jp",
|
53
|
+
.char_width = pm_encoding_euc_jp_char_width,
|
54
|
+
.alnum_char = pm_encoding_euc_jp_alnum_char,
|
55
|
+
.alpha_char = pm_encoding_euc_jp_alpha_char,
|
56
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
57
|
+
.multibyte = true
|
58
|
+
};
|
data/src/enc/pm_gbk.c
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
(
|
14
|
+
((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
|
15
|
+
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
|
+
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
|
+
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
+
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
19
|
+
)
|
20
|
+
) {
|
21
|
+
return 2;
|
22
|
+
}
|
23
|
+
|
24
|
+
return 0;
|
25
|
+
}
|
26
|
+
|
27
|
+
static size_t
|
28
|
+
pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
30
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
31
|
+
} else {
|
32
|
+
return 0;
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
static size_t
|
37
|
+
pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
39
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
40
|
+
} else {
|
41
|
+
return 0;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
static bool
|
46
|
+
pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
47
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
48
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
49
|
+
} else {
|
50
|
+
return false;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
pm_encoding_t pm_encoding_gbk = {
|
55
|
+
.name = "gbk",
|
56
|
+
.char_width = pm_encoding_gbk_char_width,
|
57
|
+
.alnum_char = pm_encoding_gbk_alnum_char,
|
58
|
+
.alpha_char = pm_encoding_gbk_alpha_char,
|
59
|
+
.isupper_char = pm_encoding_gbk_isupper_char,
|
60
|
+
.multibyte = true
|
61
|
+
};
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
15
|
+
) {
|
16
|
+
return 2;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 0;
|
20
|
+
}
|
21
|
+
|
22
|
+
static size_t
|
23
|
+
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
25
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
26
|
+
} else {
|
27
|
+
return 0;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
static size_t
|
32
|
+
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
34
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
35
|
+
} else {
|
36
|
+
return 0;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static bool
|
41
|
+
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
43
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
44
|
+
} else {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
pm_encoding_t pm_encoding_shift_jis = {
|
50
|
+
.name = "shift_jis",
|
51
|
+
.char_width = pm_encoding_shift_jis_char_width,
|
52
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
53
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
54
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
55
|
+
.multibyte = true
|
56
|
+
};
|