prism 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
data/src/diagnostic.c ADDED
@@ -0,0 +1,287 @@
1
+ #include "prism/diagnostic.h"
2
+
3
+ /*
4
+ ## Message composition
5
+
6
+ When composing an error message, use sentence fragments.
7
+
8
+ Try describing the property of the code that caused the error, rather than the rule that is being
9
+ violated. It may help to use a fragment that completes a sentence beginning, "The parser
10
+ encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
11
+ context) after a semicolon.
12
+
13
+ For example:, instead of "Control escape sequence cannot be doubled", prefer:
14
+
15
+ > "Invalid control escape sequence; control cannot be repeated"
16
+
17
+ In some cases, where the failure is more general or syntax expectations are violated, it may make
18
+ more sense to use a fragment that completes a sentence beginning, "The parser ...".
19
+
20
+ For example:
21
+
22
+ > "Expected an expression after `(`"
23
+ > "Cannot parse the expression"
24
+
25
+
26
+ ## Message style guide
27
+
28
+ - Use articles like "a", "an", and "the" when appropriate.
29
+ - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
30
+ - Use the common name for tokens and nodes.
31
+ - e.g., prefer "keyword splat" to "assoc splat"
32
+ - e.g., prefer "embedded document" to "embdoc"
33
+ - Capitalize the initial word of the message.
34
+ - Use back ticks around token literals
35
+ - e.g., "Expected a `=>` between the hash key and value"
36
+ - Do not use `.` or other punctuation at the end of the message.
37
+ - Do not use contractions like "can't". Prefer "cannot" to "can not".
38
+ - For tokens that can have multiple meanings, reference the token and its meaning.
39
+ - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
40
+
41
+
42
+ ## Error names (PM_ERR_*)
43
+
44
+ - When appropriate, prefer node name to token name.
45
+ - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
46
+ - Prefer token name to common name.
47
+ - e.g., prefer "STAR" to "ASTERISK".
48
+ - Try to order the words in the name from more general to more specific,
49
+ - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
50
+ - When in doubt, look for similar patterns and name them so that they are grouped when lexically
51
+ sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
52
+ */
53
+
54
+ static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
55
+ [PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
56
+ [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
57
+ [PM_ERR_ARGUMENT_AFTER_BLOCK] = "Unexpected argument after a block argument",
58
+ [PM_ERR_ARGUMENT_BARE_HASH] = "Unexpected bare hash argument",
59
+ [PM_ERR_ARGUMENT_BLOCK_MULTI] = "Multiple block arguments; only one block is allowed",
60
+ [PM_ERR_ARGUMENT_FORMAL_CLASS] = "Invalid formal argument; formal argument cannot be a class variable",
61
+ [PM_ERR_ARGUMENT_FORMAL_CONSTANT] = "Invalid formal argument; formal argument cannot be a constant",
62
+ [PM_ERR_ARGUMENT_FORMAL_GLOBAL] = "Invalid formal argument; formal argument cannot be a global variable",
63
+ [PM_ERR_ARGUMENT_FORMAL_IVAR] = "Invalid formal argument; formal argument cannot be an instance variable",
64
+ [PM_ERR_ARGUMENT_NO_FORWARDING_AMP] = "Unexpected `&` when the parent method is not forwarding",
65
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = "Unexpected `...` when the parent method is not forwarding",
66
+ [PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = "Unexpected `*` when the parent method is not forwarding",
67
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = "Unexpected `*` splat argument after a `**` keyword splat argument",
68
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = "Unexpected `*` splat argument after a `*` splat argument",
69
+ [PM_ERR_ARGUMENT_TERM_PAREN] = "Expected a `)` to close the arguments",
70
+ [PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = "Unexpected `{` after a method call without parenthesis",
71
+ [PM_ERR_ARRAY_ELEMENT] = "Expected an element for the array",
72
+ [PM_ERR_ARRAY_EXPRESSION] = "Expected an expression for the array element",
73
+ [PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = "Expected an expression after `*` in the array",
74
+ [PM_ERR_ARRAY_SEPARATOR] = "Expected a `,` separator for the array elements",
75
+ [PM_ERR_ARRAY_TERM] = "Expected a `]` to close the array",
76
+ [PM_ERR_BEGIN_LONELY_ELSE] = "Unexpected `else` in `begin` block; a `rescue` clause must precede `else`",
77
+ [PM_ERR_BEGIN_TERM] = "Expected an `end` to close the `begin` statement",
78
+ [PM_ERR_BEGIN_UPCASE_BRACE] = "Expected a `{` after `BEGIN`",
79
+ [PM_ERR_BEGIN_UPCASE_TERM] = "Expected a `}` to close the `BEGIN` statement",
80
+ [PM_ERR_BEGIN_UPCASE_TOPLEVEL] = "BEGIN is permitted only at toplevel",
81
+ [PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = "Expected a local variable name in the block parameters",
82
+ [PM_ERR_BLOCK_PARAM_PIPE_TERM] = "Expected the block parameters to end with `|`",
83
+ [PM_ERR_BLOCK_TERM_BRACE] = "Expected a block beginning with `{` to end with `}`",
84
+ [PM_ERR_BLOCK_TERM_END] = "Expected a block beginning with `do` to end with `end`",
85
+ [PM_ERR_CANNOT_PARSE_EXPRESSION] = "Cannot parse the expression",
86
+ [PM_ERR_CANNOT_PARSE_STRING_PART] = "Cannot parse the string part",
87
+ [PM_ERR_CASE_EXPRESSION_AFTER_CASE] = "Expected an expression after `case`",
88
+ [PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = "Expected an expression after `when`",
89
+ [PM_ERR_CASE_MISSING_CONDITIONS] = "Expected a `when` or `in` clause after `case`",
90
+ [PM_ERR_CASE_TERM] = "Expected an `end` to close the `case` statement",
91
+ [PM_ERR_CLASS_IN_METHOD] = "Unexpected class definition in a method body",
92
+ [PM_ERR_CLASS_NAME] = "Expected a constant name after `class`",
93
+ [PM_ERR_CLASS_SUPERCLASS] = "Expected a superclass after `<`",
94
+ [PM_ERR_CLASS_TERM] = "Expected an `end` to close the `class` statement",
95
+ [PM_ERR_CLASS_UNEXPECTED_END] = "Unexpected `end`, expecting ';' or '\n'",
96
+ [PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = "Expected a predicate expression for the `elsif` statement",
97
+ [PM_ERR_CONDITIONAL_IF_PREDICATE] = "Expected a predicate expression for the `if` statement",
98
+ [PM_ERR_CONDITIONAL_PREDICATE_TERM] = "Expected `then` or `;` or '\n'",
99
+ [PM_ERR_CONDITIONAL_TERM] = "Expected an `end` to close the conditional clause",
100
+ [PM_ERR_CONDITIONAL_TERM_ELSE] = "Expected an `end` to close the `else` clause",
101
+ [PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = "Expected a predicate expression for the `unless` statement",
102
+ [PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = "Expected a predicate expression for the `until` statement",
103
+ [PM_ERR_CONDITIONAL_WHILE_PREDICATE] = "Expected a predicate expression for the `while` statement",
104
+ [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = "Expected a constant after the `::` operator",
105
+ [PM_ERR_DEF_ENDLESS] = "Could not parse the endless method body",
106
+ [PM_ERR_DEF_ENDLESS_SETTER] = "Invalid method name; a setter method cannot be defined in an endless method definition",
107
+ [PM_ERR_DEF_NAME] = "Expected a method name",
108
+ [PM_ERR_DEF_NAME_AFTER_RECEIVER] = "Expected a method name after the receiver",
109
+ [PM_ERR_DEF_PARAMS_TERM] = "Expected a delimiter to close the parameters",
110
+ [PM_ERR_DEF_PARAMS_TERM_PAREN] = "Expected a `)` to close the parameters",
111
+ [PM_ERR_DEF_RECEIVER] = "Expected a receiver for the method definition",
112
+ [PM_ERR_DEF_RECEIVER_TERM] = "Expected a `.` or `::` after the receiver in a method definition",
113
+ [PM_ERR_DEF_TERM] = "Expected an `end` to close the `def` statement",
114
+ [PM_ERR_DEFINED_EXPRESSION] = "Expected an expression after `defined?`",
115
+ [PM_ERR_EMBDOC_TERM] = "Could not find a terminator for the embedded document",
116
+ [PM_ERR_EMBEXPR_END] = "Expected a `}` to close the embedded expression",
117
+ [PM_ERR_EMBVAR_INVALID] = "Invalid embedded variable",
118
+ [PM_ERR_END_UPCASE_BRACE] = "Expected a `{` after `END`",
119
+ [PM_ERR_END_UPCASE_TERM] = "Expected a `}` to close the `END` statement",
120
+ [PM_ERR_ESCAPE_INVALID_CONTROL] = "Invalid control escape sequence",
121
+ [PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = "Invalid control escape sequence; control cannot be repeated",
122
+ [PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = "Invalid hexadecimal escape sequence",
123
+ [PM_ERR_ESCAPE_INVALID_META] = "Invalid meta escape sequence",
124
+ [PM_ERR_ESCAPE_INVALID_META_REPEAT] = "Invalid meta escape sequence; meta cannot be repeated",
125
+ [PM_ERR_ESCAPE_INVALID_UNICODE] = "Invalid Unicode escape sequence",
126
+ [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = "Invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags",
127
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = "Invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal",
128
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = "Invalid Unicode escape sequence; maximum length is 6 digits",
129
+ [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = "Invalid Unicode escape sequence; needs closing `}`",
130
+ [PM_ERR_EXPECT_ARGUMENT] = "Expected an argument",
131
+ [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = "Expected a newline or semicolon after the statement",
132
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = "Expected an expression after `&&=`",
133
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = "Expected an expression after `||=`",
134
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = "Expected an expression after `,`",
135
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = "Expected an expression after `=`",
136
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = "Expected an expression after `<<`",
137
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = "Expected an expression after `(`",
138
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = "Expected an expression after the operator",
139
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = "Expected an expression after `*` splat in an argument",
140
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = "Expected an expression after `**` in a hash",
141
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = "Expected an expression after `*`",
142
+ [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = "Expected an identifier for the required parameter",
143
+ [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = "Expected a `(` to start a required parameter",
144
+ [PM_ERR_EXPECT_RBRACKET] = "Expected a matching `]`",
145
+ [PM_ERR_EXPECT_RPAREN] = "Expected a matching `)`",
146
+ [PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = "Expected a `)` after multiple assignment",
147
+ [PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = "Expected a `)` to end a required parameter",
148
+ [PM_ERR_EXPECT_STRING_CONTENT] = "Expected string content after opening string delimiter",
149
+ [PM_ERR_EXPECT_WHEN_DELIMITER] = "Expected a delimiter after the predicates of a `when` clause",
150
+ [PM_ERR_EXPRESSION_BARE_HASH] = "Unexpected bare hash in expression",
151
+ [PM_ERR_FOR_COLLECTION] = "Expected a collection after the `in` in a `for` statement",
152
+ [PM_ERR_FOR_INDEX] = "Expected an index after `for`",
153
+ [PM_ERR_FOR_IN] = "Expected an `in` after the index in a `for` statement",
154
+ [PM_ERR_FOR_TERM] = "Expected an `end` to close the `for` loop",
155
+ [PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = "Expected an expression after the label in a hash",
156
+ [PM_ERR_HASH_KEY] = "Expected a key in the hash literal",
157
+ [PM_ERR_HASH_ROCKET] = "Expected a `=>` between the hash key and value",
158
+ [PM_ERR_HASH_TERM] = "Expected a `}` to close the hash literal",
159
+ [PM_ERR_HASH_VALUE] = "Expected a value in the hash literal",
160
+ [PM_ERR_HEREDOC_TERM] = "Could not find a terminator for the heredoc",
161
+ [PM_ERR_INCOMPLETE_QUESTION_MARK] = "Incomplete expression at `?`",
162
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = "Incomplete class variable",
163
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = "Incomplete instance variable",
164
+ [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = "Unknown or invalid encoding in the magic comment",
165
+ [PM_ERR_INVALID_FLOAT_EXPONENT] = "Invalid exponent",
166
+ [PM_ERR_INVALID_NUMBER_BINARY] = "Invalid binary number",
167
+ [PM_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
168
+ [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
169
+ [PM_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
170
+ [PM_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
171
+ [PM_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
172
+ [PM_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
173
+ [PM_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
174
+ [PM_ERR_LAMBDA_OPEN] = "Expected a `do` keyword or a `{` to open the lambda block",
175
+ [PM_ERR_LAMBDA_TERM_BRACE] = "Expected a lambda block beginning with `{` to end with `}`",
176
+ [PM_ERR_LAMBDA_TERM_END] = "Expected a lambda block beginning with `do` to end with `end`",
177
+ [PM_ERR_LIST_I_LOWER_ELEMENT] = "Expected a symbol in a `%i` list",
178
+ [PM_ERR_LIST_I_LOWER_TERM] = "Expected a closing delimiter for the `%i` list",
179
+ [PM_ERR_LIST_I_UPPER_ELEMENT] = "Expected a symbol in a `%I` list",
180
+ [PM_ERR_LIST_I_UPPER_TERM] = "Expected a closing delimiter for the `%I` list",
181
+ [PM_ERR_LIST_W_LOWER_ELEMENT] = "Expected a string in a `%w` list",
182
+ [PM_ERR_LIST_W_LOWER_TERM] = "Expected a closing delimiter for the `%w` list",
183
+ [PM_ERR_LIST_W_UPPER_ELEMENT] = "Expected a string in a `%W` list",
184
+ [PM_ERR_LIST_W_UPPER_TERM] = "Expected a closing delimiter for the `%W` list",
185
+ [PM_ERR_MALLOC_FAILED] = "Failed to allocate memory",
186
+ [PM_ERR_MODULE_IN_METHOD] = "Unexpected module definition in a method body",
187
+ [PM_ERR_MODULE_NAME] = "Expected a constant name after `module`",
188
+ [PM_ERR_MODULE_TERM] = "Expected an `end` to close the `module` statement",
189
+ [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = "Multiple splats in multiple assignment",
190
+ [PM_ERR_NOT_EXPRESSION] = "Expected an expression after `not`",
191
+ [PM_ERR_NUMBER_LITERAL_UNDERSCORE] = "Number literal ending with a `_`",
192
+ [PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED] = "Numbered parameters are not allowed alongside explicit parameters",
193
+ [PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE] = "Numbered parameter is already used in outer scope",
194
+ [PM_ERR_OPERATOR_MULTI_ASSIGN] = "Unexpected operator for a multiple assignment",
195
+ [PM_ERR_OPERATOR_WRITE_BLOCK] = "Unexpected operator after a call with a block",
196
+ [PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = "Unexpected multiple `**` splat parameters",
197
+ [PM_ERR_PARAMETER_BLOCK_MULTI] = "Multiple block parameters; only one block is allowed",
198
+ [PM_ERR_PARAMETER_METHOD_NAME] = "Unexpected name for a parameter",
199
+ [PM_ERR_PARAMETER_NAME_REPEAT] = "Repeated parameter name",
200
+ [PM_ERR_PARAMETER_NO_DEFAULT] = "Expected a default value for the parameter",
201
+ [PM_ERR_PARAMETER_NO_DEFAULT_KW] = "Expected a default value for the keyword parameter",
202
+ [PM_ERR_PARAMETER_NUMBERED_RESERVED] = "Token reserved for a numbered parameter",
203
+ [PM_ERR_PARAMETER_ORDER] = "Unexpected parameter order",
204
+ [PM_ERR_PARAMETER_SPLAT_MULTI] = "Unexpected multiple `*` splat parameters",
205
+ [PM_ERR_PARAMETER_STAR] = "Unexpected parameter `*`",
206
+ [PM_ERR_PARAMETER_UNEXPECTED_FWD] = "Unexpected `...` in parameters",
207
+ [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = "Unexpected `,` in parameters",
208
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = "Expected a pattern expression after the `[` operator",
209
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = "Expected a pattern expression after `,`",
210
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = "Expected a pattern expression after `=>`",
211
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = "Expected a pattern expression after the `in` keyword",
212
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = "Expected a pattern expression after the key",
213
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = "Expected a pattern expression after the `(` operator",
214
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = "Expected a pattern expression after the `^` pin operator",
215
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = "Expected a pattern expression after the `|` operator",
216
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = "Expected a pattern expression after the range operator",
217
+ [PM_ERR_PATTERN_HASH_KEY] = "Expected a key in the hash pattern",
218
+ [PM_ERR_PATTERN_HASH_KEY_LABEL] = "Expected a label as the key in the hash pattern", // TODO // THIS // AND // ABOVE // IS WEIRD
219
+ [PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = "Expected an identifier after the `=>` operator",
220
+ [PM_ERR_PATTERN_LABEL_AFTER_COMMA] = "Expected a label after the `,` in the hash pattern",
221
+ [PM_ERR_PATTERN_REST] = "Unexpected rest pattern",
222
+ [PM_ERR_PATTERN_TERM_BRACE] = "Expected a `}` to close the pattern expression",
223
+ [PM_ERR_PATTERN_TERM_BRACKET] = "Expected a `]` to close the pattern expression",
224
+ [PM_ERR_PATTERN_TERM_PAREN] = "Expected a `)` to close the pattern expression",
225
+ [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = "Unexpected `||=` in a multiple assignment",
226
+ [PM_ERR_REGEXP_TERM] = "Expected a closing delimiter for the regular expression",
227
+ [PM_ERR_RESCUE_EXPRESSION] = "Expected a rescued expression",
228
+ [PM_ERR_RESCUE_MODIFIER_VALUE] = "Expected a value after the `rescue` modifier",
229
+ [PM_ERR_RESCUE_TERM] = "Expected a closing delimiter for the `rescue` clause",
230
+ [PM_ERR_RESCUE_VARIABLE] = "Expected an exception variable after `=>` in a rescue statement",
231
+ [PM_ERR_RETURN_INVALID] = "Invalid `return` in a class or module body",
232
+ [PM_ERR_STRING_CONCATENATION] = "Expected a string for concatenation",
233
+ [PM_ERR_STRING_INTERPOLATED_TERM] = "Expected a closing delimiter for the interpolated string",
234
+ [PM_ERR_STRING_LITERAL_TERM] = "Expected a closing delimiter for the string literal",
235
+ [PM_ERR_SYMBOL_INVALID] = "Invalid symbol", // TODO expected symbol? prism.c ~9719
236
+ [PM_ERR_SYMBOL_TERM_DYNAMIC] = "Expected a closing delimiter for the dynamic symbol",
237
+ [PM_ERR_SYMBOL_TERM_INTERPOLATED] = "Expected a closing delimiter for the interpolated symbol",
238
+ [PM_ERR_TERNARY_COLON] = "Expected a `:` after the true expression of a ternary operator",
239
+ [PM_ERR_TERNARY_EXPRESSION_FALSE] = "Expected an expression after `:` in the ternary operator",
240
+ [PM_ERR_TERNARY_EXPRESSION_TRUE] = "Expected an expression after `?` in the ternary operator",
241
+ [PM_ERR_UNDEF_ARGUMENT] = "Invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument",
242
+ [PM_ERR_UNARY_RECEIVER_BANG] = "Expected a receiver for unary `!`",
243
+ [PM_ERR_UNARY_RECEIVER_MINUS] = "Expected a receiver for unary `-`",
244
+ [PM_ERR_UNARY_RECEIVER_PLUS] = "Expected a receiver for unary `+`",
245
+ [PM_ERR_UNARY_RECEIVER_TILDE] = "Expected a receiver for unary `~`",
246
+ [PM_ERR_UNTIL_TERM] = "Expected an `end` to close the `until` statement",
247
+ [PM_ERR_WHILE_TERM] = "Expected an `end` to close the `while` statement",
248
+ [PM_ERR_WRITE_TARGET_READONLY] = "Immutable variable as a write target",
249
+ [PM_ERR_WRITE_TARGET_UNEXPECTED] = "Unexpected write target",
250
+ [PM_ERR_XSTRING_TERM] = "Expected a closing delimiter for the `%x` or backtick string",
251
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = "Ambiguous first argument; put parentheses or a space even after `-` operator",
252
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = "Ambiguous first argument; put parentheses or a space even after `+` operator",
253
+ [PM_WARN_AMBIGUOUS_PREFIX_STAR] = "Ambiguous `*` has been interpreted as an argument prefix",
254
+ [PM_WARN_AMBIGUOUS_SLASH] = "Ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator",
255
+ };
256
+
257
+ static const char*
258
+ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
259
+ assert(diag_id < PM_DIAGNOSTIC_ID_LEN);
260
+ const char *message = diagnostic_messages[diag_id];
261
+ assert(message);
262
+ return message;
263
+ }
264
+
265
+ // Append an error to the given list of diagnostic.
266
+ bool
267
+ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
268
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) malloc(sizeof(pm_diagnostic_t));
269
+ if (diagnostic == NULL) return false;
270
+
271
+ *diagnostic = (pm_diagnostic_t) { .start = start, .end = end, .message = pm_diagnostic_message(diag_id) };
272
+ pm_list_append(list, (pm_list_node_t *) diagnostic);
273
+ return true;
274
+ }
275
+
276
+ // Deallocate the internal state of the given diagnostic list.
277
+ void
278
+ pm_diagnostic_list_free(pm_list_t *list) {
279
+ pm_list_node_t *node, *next;
280
+
281
+ for (node = list->head; node != NULL; node = next) {
282
+ next = node->next;
283
+
284
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) node;
285
+ free(diagnostic);
286
+ }
287
+ }
data/src/enc/pm_big5.c ADDED
@@ -0,0 +1,52 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
12
+ return 2;
13
+ }
14
+
15
+ return 0;
16
+ }
17
+
18
+ static size_t
19
+ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
+ if (pm_encoding_big5_char_width(b, n) == 1) {
21
+ return pm_encoding_ascii_alpha_char(b, n);
22
+ } else {
23
+ return 0;
24
+ }
25
+ }
26
+
27
+ static size_t
28
+ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (pm_encoding_big5_char_width(b, n) == 1) {
30
+ return pm_encoding_ascii_alnum_char(b, n);
31
+ } else {
32
+ return 0;
33
+ }
34
+ }
35
+
36
+ static bool
37
+ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (pm_encoding_big5_char_width(b, n) == 1) {
39
+ return pm_encoding_ascii_isupper_char(b, n);
40
+ } else {
41
+ return false;
42
+ }
43
+ }
44
+
45
+ pm_encoding_t pm_encoding_big5 = {
46
+ .name = "big5",
47
+ .char_width = pm_encoding_big5_char_width,
48
+ .alnum_char = pm_encoding_big5_alnum_char,
49
+ .alpha_char = pm_encoding_big5_alpha_char,
50
+ .isupper_char = pm_encoding_big5_isupper_char,
51
+ .multibyte = true
52
+ };
@@ -0,0 +1,58 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ (
14
+ ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
15
+ ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
16
+ )
17
+ ) {
18
+ return 2;
19
+ }
20
+
21
+ return 0;
22
+ }
23
+
24
+ static size_t
25
+ pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
26
+ if (pm_encoding_euc_jp_char_width(b, n) == 1) {
27
+ return pm_encoding_ascii_alpha_char(b, n);
28
+ } else {
29
+ return 0;
30
+ }
31
+ }
32
+
33
+ static size_t
34
+ pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
35
+ if (pm_encoding_euc_jp_char_width(b, n) == 1) {
36
+ return pm_encoding_ascii_alnum_char(b, n);
37
+ } else {
38
+ return 0;
39
+ }
40
+ }
41
+
42
+ static bool
43
+ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
44
+ if (pm_encoding_euc_jp_char_width(b, n) == 1) {
45
+ return pm_encoding_ascii_isupper_char(b, n);
46
+ } else {
47
+ return 0;
48
+ }
49
+ }
50
+
51
+ pm_encoding_t pm_encoding_euc_jp = {
52
+ .name = "euc-jp",
53
+ .char_width = pm_encoding_euc_jp_char_width,
54
+ .alnum_char = pm_encoding_euc_jp_alnum_char,
55
+ .alpha_char = pm_encoding_euc_jp_alpha_char,
56
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
57
+ .multibyte = true
58
+ };
data/src/enc/pm_gbk.c ADDED
@@ -0,0 +1,61 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ (
14
+ ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
15
+ ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
+ ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
+ ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
19
+ )
20
+ ) {
21
+ return 2;
22
+ }
23
+
24
+ return 0;
25
+ }
26
+
27
+ static size_t
28
+ pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (pm_encoding_gbk_char_width(b, n) == 1) {
30
+ return pm_encoding_ascii_alpha_char(b, n);
31
+ } else {
32
+ return 0;
33
+ }
34
+ }
35
+
36
+ static size_t
37
+ pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (pm_encoding_gbk_char_width(b, n) == 1) {
39
+ return pm_encoding_ascii_alnum_char(b, n);
40
+ } else {
41
+ return 0;
42
+ }
43
+ }
44
+
45
+ static bool
46
+ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
47
+ if (pm_encoding_gbk_char_width(b, n) == 1) {
48
+ return pm_encoding_ascii_isupper_char(b, n);
49
+ } else {
50
+ return false;
51
+ }
52
+ }
53
+
54
+ pm_encoding_t pm_encoding_gbk = {
55
+ .name = "gbk",
56
+ .char_width = pm_encoding_gbk_char_width,
57
+ .alnum_char = pm_encoding_gbk_alnum_char,
58
+ .alpha_char = pm_encoding_gbk_alpha_char,
59
+ .isupper_char = pm_encoding_gbk_isupper_char,
60
+ .multibyte = true
61
+ };
@@ -0,0 +1,56 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
15
+ ) {
16
+ return 2;
17
+ }
18
+
19
+ return 0;
20
+ }
21
+
22
+ static size_t
23
+ pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (pm_encoding_shift_jis_char_width(b, n) == 1) {
25
+ return pm_encoding_ascii_alpha_char(b, n);
26
+ } else {
27
+ return 0;
28
+ }
29
+ }
30
+
31
+ static size_t
32
+ pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (pm_encoding_shift_jis_char_width(b, n) == 1) {
34
+ return pm_encoding_ascii_alnum_char(b, n);
35
+ } else {
36
+ return 0;
37
+ }
38
+ }
39
+
40
+ static bool
41
+ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (pm_encoding_shift_jis_char_width(b, n) == 1) {
43
+ return pm_encoding_ascii_isupper_char(b, n);
44
+ } else {
45
+ return 0;
46
+ }
47
+ }
48
+
49
+ pm_encoding_t pm_encoding_shift_jis = {
50
+ .name = "shift_jis",
51
+ .char_width = pm_encoding_shift_jis_char_width,
52
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
53
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
54
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
55
+ .multibyte = true
56
+ };