prism 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/enc/pm_unicode.c
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
|
2
|
-
// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
3
|
-
|
4
1
|
#include "prism/enc/pm_encoding.h"
|
5
2
|
|
6
3
|
typedef uint32_t pm_unicode_codepoint_t;
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
5
|
+
/**
|
6
|
+
* Each element of the following table contains a bitfield that indicates a
|
7
|
+
* piece of information about the corresponding unicode codepoint. Note that
|
8
|
+
* this table is different from other encodings where we used a lookup table
|
9
|
+
* because the indices of those tables are the byte representations, not the
|
10
|
+
* codepoints themselves.
|
11
|
+
*/
|
13
12
|
const uint8_t pm_encoding_unicode_table[256] = {
|
14
13
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
14
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
2179
2178
|
0x1F170, 0x1F189,
|
2180
2179
|
};
|
2181
2180
|
|
2181
|
+
/**
|
2182
|
+
* Binary search through the given list of codepoints to see if the given
|
2183
|
+
* codepoint is in the list.
|
2184
|
+
*/
|
2182
2185
|
static bool
|
2183
2186
|
pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
|
2184
2187
|
size_t start = 0;
|
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
|
|
2202
2205
|
return false;
|
2203
2206
|
}
|
2204
2207
|
|
2208
|
+
/**
|
2209
|
+
* A state transition table for decoding UTF-8.
|
2210
|
+
*
|
2211
|
+
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
2212
|
+
*
|
2213
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
2214
|
+
* of this software and associated documentation files (the "Software"), to deal
|
2215
|
+
* in the Software without restriction, including without limitation the rights
|
2216
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
2217
|
+
* copies of the Software, and to permit persons to whom the Software is
|
2218
|
+
* furnished to do so, subject to the following conditions:
|
2219
|
+
*
|
2220
|
+
* The above copyright notice and this permission notice shall be included in
|
2221
|
+
* all copies or substantial portions of the Software.
|
2222
|
+
*
|
2223
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
2224
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
2225
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
2226
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
2227
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
2228
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
2229
|
+
* SOFTWARE.
|
2230
|
+
*/
|
2205
2231
|
static const uint8_t pm_utf_8_dfa[] = {
|
2206
2232
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
2207
2233
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2219
2245
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
|
2220
2246
|
};
|
2221
2247
|
|
2248
|
+
/**
|
2249
|
+
* Given a pointer to a string and the number of bytes remaining in the string,
|
2250
|
+
* decode the next UTF-8 codepoint and return it. The number of bytes consumed
|
2251
|
+
* is returned in the width out parameter.
|
2252
|
+
*/
|
2222
2253
|
static pm_unicode_codepoint_t
|
2223
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2255
|
assert(n >= 1);
|
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
2253
2284
|
return width;
|
2254
2285
|
}
|
2255
2286
|
|
2287
|
+
/**
|
2288
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2289
|
+
* alphabetical character.
|
2290
|
+
*/
|
2256
2291
|
size_t
|
2257
2292
|
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
2293
|
if (*b < 0x80) {
|
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
|
2269
2304
|
}
|
2270
2305
|
}
|
2271
2306
|
|
2307
|
+
/**
|
2308
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2309
|
+
* alphanumeric character.
|
2310
|
+
*/
|
2272
2311
|
size_t
|
2273
2312
|
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
2313
|
if (*b < 0x80) {
|
@@ -2285,6 +2324,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|
2285
2324
|
}
|
2286
2325
|
}
|
2287
2326
|
|
2327
|
+
/**
|
2328
|
+
* Return true if the next character in the UTF-8 encoding if it is an uppercase
|
2329
|
+
* character.
|
2330
|
+
*/
|
2288
2331
|
bool
|
2289
2332
|
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
2333
|
if (*b < 0x80) {
|
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
2305
2348
|
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
|
2306
2349
|
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
2307
2350
|
|
2351
|
+
/** UTF-8 */
|
2308
2352
|
pm_encoding_t pm_encoding_utf_8 = {
|
2309
2353
|
.name = "utf-8",
|
2310
2354
|
.char_width = pm_encoding_utf_8_char_width,
|
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
|
|
2314
2358
|
.multibyte = true
|
2315
2359
|
};
|
2316
2360
|
|
2361
|
+
/** UTF8-mac */
|
2317
2362
|
pm_encoding_t pm_encoding_utf8_mac = {
|
2318
2363
|
.name = "utf8-mac",
|
2319
2364
|
.char_width = pm_encoding_utf_8_char_width,
|
data/src/enc/pm_windows_31j.c
CHANGED