prism 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/enc/pm_unicode.c
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
|
2
|
-
// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
3
|
-
|
4
1
|
#include "prism/enc/pm_encoding.h"
|
5
2
|
|
6
3
|
typedef uint32_t pm_unicode_codepoint_t;
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
5
|
+
/**
|
6
|
+
* Each element of the following table contains a bitfield that indicates a
|
7
|
+
* piece of information about the corresponding unicode codepoint. Note that
|
8
|
+
* this table is different from other encodings where we used a lookup table
|
9
|
+
* because the indices of those tables are the byte representations, not the
|
10
|
+
* codepoints themselves.
|
11
|
+
*/
|
13
12
|
const uint8_t pm_encoding_unicode_table[256] = {
|
14
13
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
14
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
2179
2178
|
0x1F170, 0x1F189,
|
2180
2179
|
};
|
2181
2180
|
|
2181
|
+
/**
|
2182
|
+
* Binary search through the given list of codepoints to see if the given
|
2183
|
+
* codepoint is in the list.
|
2184
|
+
*/
|
2182
2185
|
static bool
|
2183
2186
|
pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
|
2184
2187
|
size_t start = 0;
|
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
|
|
2202
2205
|
return false;
|
2203
2206
|
}
|
2204
2207
|
|
2208
|
+
/**
|
2209
|
+
* A state transition table for decoding UTF-8.
|
2210
|
+
*
|
2211
|
+
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
2212
|
+
*
|
2213
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
2214
|
+
* of this software and associated documentation files (the "Software"), to deal
|
2215
|
+
* in the Software without restriction, including without limitation the rights
|
2216
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
2217
|
+
* copies of the Software, and to permit persons to whom the Software is
|
2218
|
+
* furnished to do so, subject to the following conditions:
|
2219
|
+
*
|
2220
|
+
* The above copyright notice and this permission notice shall be included in
|
2221
|
+
* all copies or substantial portions of the Software.
|
2222
|
+
*
|
2223
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
2224
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
2225
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
2226
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
2227
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
2228
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
2229
|
+
* SOFTWARE.
|
2230
|
+
*/
|
2205
2231
|
static const uint8_t pm_utf_8_dfa[] = {
|
2206
2232
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
2207
2233
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2219
2245
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
|
2220
2246
|
};
|
2221
2247
|
|
2248
|
+
/**
|
2249
|
+
* Given a pointer to a string and the number of bytes remaining in the string,
|
2250
|
+
* decode the next UTF-8 codepoint and return it. The number of bytes consumed
|
2251
|
+
* is returned in the width out parameter.
|
2252
|
+
*/
|
2222
2253
|
static pm_unicode_codepoint_t
|
2223
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2255
|
assert(n >= 1);
|
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
2253
2284
|
return width;
|
2254
2285
|
}
|
2255
2286
|
|
2287
|
+
/**
|
2288
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2289
|
+
* alphabetical character.
|
2290
|
+
*/
|
2256
2291
|
size_t
|
2257
2292
|
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
2293
|
if (*b < 0x80) {
|
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
|
2269
2304
|
}
|
2270
2305
|
}
|
2271
2306
|
|
2307
|
+
/**
|
2308
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2309
|
+
* alphanumeric character.
|
2310
|
+
*/
|
2272
2311
|
size_t
|
2273
2312
|
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
2313
|
if (*b < 0x80) {
|
@@ -2285,6 +2324,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|
2285
2324
|
}
|
2286
2325
|
}
|
2287
2326
|
|
2327
|
+
/**
|
2328
|
+
* Return true if the next character in the UTF-8 encoding if it is an uppercase
|
2329
|
+
* character.
|
2330
|
+
*/
|
2288
2331
|
bool
|
2289
2332
|
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
2333
|
if (*b < 0x80) {
|
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
2305
2348
|
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
|
2306
2349
|
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
2307
2350
|
|
2351
|
+
/** UTF-8 */
|
2308
2352
|
pm_encoding_t pm_encoding_utf_8 = {
|
2309
2353
|
.name = "utf-8",
|
2310
2354
|
.char_width = pm_encoding_utf_8_char_width,
|
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
|
|
2314
2358
|
.multibyte = true
|
2315
2359
|
};
|
2316
2360
|
|
2361
|
+
/** UTF8-mac */
|
2317
2362
|
pm_encoding_t pm_encoding_utf8_mac = {
|
2318
2363
|
.name = "utf8-mac",
|
2319
2364
|
.char_width = pm_encoding_utf_8_char_width,
|
data/src/enc/pm_windows_31j.c
CHANGED