prism 0.15.1 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -1
- data/Makefile +12 -0
- data/README.md +3 -1
- data/config.yml +66 -50
- data/docs/configuration.md +2 -0
- data/docs/fuzzing.md +1 -1
- data/docs/javascript.md +90 -0
- data/docs/releasing.md +27 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +856 -826
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +494 -119
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3157 -747
- data/include/prism/defines.h +40 -8
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +119 -28
- data/include/prism/node.h +38 -30
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -199
- data/include/prism/prettyprint.h +26 -0
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +102 -18
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +128 -34
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +71 -28
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +28 -3
- data/include/prism.h +229 -36
- data/lib/prism/compiler.rb +5 -5
- data/lib/prism/debug.rb +43 -13
- data/lib/prism/desugar_compiler.rb +1 -1
- data/lib/prism/dispatcher.rb +27 -26
- data/lib/prism/dsl.rb +16 -16
- data/lib/prism/ffi.rb +138 -61
- data/lib/prism/lex_compat.rb +26 -16
- data/lib/prism/mutation_compiler.rb +11 -11
- data/lib/prism/node.rb +426 -227
- data/lib/prism/node_ext.rb +23 -16
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +157 -21
- data/lib/prism/pattern.rb +14 -3
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +935 -307
- data/lib/prism/visitor.rb +9 -5
- data/lib/prism.rb +20 -2
- data/prism.gemspec +11 -2
- data/rbi/prism.rbi +7305 -0
- data/rbi/prism_static.rbi +196 -0
- data/sig/prism.rbs +4468 -0
- data/sig/prism_static.rbs +123 -0
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +54 -9
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +357 -345
- data/src/options.c +170 -0
- data/src/prettyprint.c +7697 -1643
- data/src/prism.c +1964 -1125
- data/src/regexp.c +153 -95
- data/src/serialize.c +432 -397
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +88 -23
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +52 -22
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +25 -63
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +12 -3
- data/docs/prism.png +0 -0
data/src/enc/pm_unicode.c
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
|
2
|
-
// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
3
|
-
|
4
1
|
#include "prism/enc/pm_encoding.h"
|
5
2
|
|
6
3
|
typedef uint32_t pm_unicode_codepoint_t;
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
5
|
+
/**
|
6
|
+
* Each element of the following table contains a bitfield that indicates a
|
7
|
+
* piece of information about the corresponding unicode codepoint. Note that
|
8
|
+
* this table is different from other encodings where we used a lookup table
|
9
|
+
* because the indices of those tables are the byte representations, not the
|
10
|
+
* codepoints themselves.
|
11
|
+
*/
|
13
12
|
const uint8_t pm_encoding_unicode_table[256] = {
|
14
13
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
14
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
|
|
2179
2178
|
0x1F170, 0x1F189,
|
2180
2179
|
};
|
2181
2180
|
|
2181
|
+
/**
|
2182
|
+
* Binary search through the given list of codepoints to see if the given
|
2183
|
+
* codepoint is in the list.
|
2184
|
+
*/
|
2182
2185
|
static bool
|
2183
2186
|
pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
|
2184
2187
|
size_t start = 0;
|
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
|
|
2202
2205
|
return false;
|
2203
2206
|
}
|
2204
2207
|
|
2208
|
+
/**
|
2209
|
+
* A state transition table for decoding UTF-8.
|
2210
|
+
*
|
2211
|
+
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
2212
|
+
*
|
2213
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
2214
|
+
* of this software and associated documentation files (the "Software"), to deal
|
2215
|
+
* in the Software without restriction, including without limitation the rights
|
2216
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
2217
|
+
* copies of the Software, and to permit persons to whom the Software is
|
2218
|
+
* furnished to do so, subject to the following conditions:
|
2219
|
+
*
|
2220
|
+
* The above copyright notice and this permission notice shall be included in
|
2221
|
+
* all copies or substantial portions of the Software.
|
2222
|
+
*
|
2223
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
2224
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
2225
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
2226
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
2227
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
2228
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
2229
|
+
* SOFTWARE.
|
2230
|
+
*/
|
2205
2231
|
static const uint8_t pm_utf_8_dfa[] = {
|
2206
2232
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
2207
2233
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2219
2245
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
|
2220
2246
|
};
|
2221
2247
|
|
2248
|
+
/**
|
2249
|
+
* Given a pointer to a string and the number of bytes remaining in the string,
|
2250
|
+
* decode the next UTF-8 codepoint and return it. The number of bytes consumed
|
2251
|
+
* is returned in the width out parameter.
|
2252
|
+
*/
|
2222
2253
|
static pm_unicode_codepoint_t
|
2223
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2255
|
assert(n >= 1);
|
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
2253
2284
|
return width;
|
2254
2285
|
}
|
2255
2286
|
|
2287
|
+
/**
|
2288
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2289
|
+
* alphabetical character.
|
2290
|
+
*/
|
2256
2291
|
size_t
|
2257
2292
|
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
2293
|
if (*b < 0x80) {
|
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
|
2269
2304
|
}
|
2270
2305
|
}
|
2271
2306
|
|
2307
|
+
/**
|
2308
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
2309
|
+
* alphanumeric character.
|
2310
|
+
*/
|
2272
2311
|
size_t
|
2273
2312
|
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
2313
|
if (*b < 0x80) {
|
@@ -2285,7 +2324,11 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|
2285
2324
|
}
|
2286
2325
|
}
|
2287
2326
|
|
2288
|
-
|
2327
|
+
/**
|
2328
|
+
* Return true if the next character in the UTF-8 encoding if it is an uppercase
|
2329
|
+
* character.
|
2330
|
+
*/
|
2331
|
+
bool
|
2289
2332
|
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
2333
|
if (*b < 0x80) {
|
2291
2334
|
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
|
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
2305
2348
|
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
|
2306
2349
|
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
2307
2350
|
|
2351
|
+
/** UTF-8 */
|
2308
2352
|
pm_encoding_t pm_encoding_utf_8 = {
|
2309
2353
|
.name = "utf-8",
|
2310
2354
|
.char_width = pm_encoding_utf_8_char_width,
|
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
|
|
2314
2358
|
.multibyte = true
|
2315
2359
|
};
|
2316
2360
|
|
2361
|
+
/** UTF8-mac */
|
2317
2362
|
pm_encoding_t pm_encoding_utf8_mac = {
|
2318
2363
|
.name = "utf8-mac",
|
2319
2364
|
.char_width = pm_encoding_utf_8_char_width,
|
data/src/enc/pm_windows_31j.c
CHANGED