prism 0.15.1 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +35 -1
 - data/Makefile +12 -0
 - data/README.md +3 -1
 - data/config.yml +66 -50
 - data/docs/configuration.md +2 -0
 - data/docs/fuzzing.md +1 -1
 - data/docs/javascript.md +90 -0
 - data/docs/releasing.md +27 -0
 - data/docs/ruby_api.md +2 -0
 - data/docs/serialization.md +28 -29
 - data/ext/prism/api_node.c +856 -826
 - data/ext/prism/api_pack.c +20 -9
 - data/ext/prism/extension.c +494 -119
 - data/ext/prism/extension.h +1 -1
 - data/include/prism/ast.h +3157 -747
 - data/include/prism/defines.h +40 -8
 - data/include/prism/diagnostic.h +36 -3
 - data/include/prism/enc/pm_encoding.h +119 -28
 - data/include/prism/node.h +38 -30
 - data/include/prism/options.h +204 -0
 - data/include/prism/pack.h +44 -33
 - data/include/prism/parser.h +445 -199
 - data/include/prism/prettyprint.h +26 -0
 - data/include/prism/regexp.h +16 -2
 - data/include/prism/util/pm_buffer.h +102 -18
 - data/include/prism/util/pm_char.h +162 -48
 - data/include/prism/util/pm_constant_pool.h +128 -34
 - data/include/prism/util/pm_list.h +68 -38
 - data/include/prism/util/pm_memchr.h +18 -3
 - data/include/prism/util/pm_newline_list.h +71 -28
 - data/include/prism/util/pm_state_stack.h +25 -7
 - data/include/prism/util/pm_string.h +115 -27
 - data/include/prism/util/pm_string_list.h +25 -6
 - data/include/prism/util/pm_strncasecmp.h +32 -0
 - data/include/prism/util/pm_strpbrk.h +31 -17
 - data/include/prism/version.h +28 -3
 - data/include/prism.h +229 -36
 - data/lib/prism/compiler.rb +5 -5
 - data/lib/prism/debug.rb +43 -13
 - data/lib/prism/desugar_compiler.rb +1 -1
 - data/lib/prism/dispatcher.rb +27 -26
 - data/lib/prism/dsl.rb +16 -16
 - data/lib/prism/ffi.rb +138 -61
 - data/lib/prism/lex_compat.rb +26 -16
 - data/lib/prism/mutation_compiler.rb +11 -11
 - data/lib/prism/node.rb +426 -227
 - data/lib/prism/node_ext.rb +23 -16
 - data/lib/prism/node_inspector.rb +1 -1
 - data/lib/prism/pack.rb +79 -40
 - data/lib/prism/parse_result/comments.rb +7 -2
 - data/lib/prism/parse_result/newlines.rb +4 -0
 - data/lib/prism/parse_result.rb +157 -21
 - data/lib/prism/pattern.rb +14 -3
 - data/lib/prism/ripper_compat.rb +28 -10
 - data/lib/prism/serialize.rb +935 -307
 - data/lib/prism/visitor.rb +9 -5
 - data/lib/prism.rb +20 -2
 - data/prism.gemspec +11 -2
 - data/rbi/prism.rbi +7305 -0
 - data/rbi/prism_static.rbi +196 -0
 - data/sig/prism.rbs +4468 -0
 - data/sig/prism_static.rbs +123 -0
 - data/src/diagnostic.c +56 -53
 - data/src/enc/pm_big5.c +1 -0
 - data/src/enc/pm_euc_jp.c +1 -0
 - data/src/enc/pm_gbk.c +1 -0
 - data/src/enc/pm_shift_jis.c +1 -0
 - data/src/enc/pm_tables.c +316 -80
 - data/src/enc/pm_unicode.c +54 -9
 - data/src/enc/pm_windows_31j.c +1 -0
 - data/src/node.c +357 -345
 - data/src/options.c +170 -0
 - data/src/prettyprint.c +7697 -1643
 - data/src/prism.c +1964 -1125
 - data/src/regexp.c +153 -95
 - data/src/serialize.c +432 -397
 - data/src/token_type.c +3 -1
 - data/src/util/pm_buffer.c +88 -23
 - data/src/util/pm_char.c +103 -57
 - data/src/util/pm_constant_pool.c +52 -22
 - data/src/util/pm_list.c +12 -4
 - data/src/util/pm_memchr.c +5 -3
 - data/src/util/pm_newline_list.c +25 -63
 - data/src/util/pm_state_stack.c +9 -3
 - data/src/util/pm_string.c +95 -85
 - data/src/util/pm_string_list.c +14 -15
 - data/src/util/pm_strncasecmp.c +10 -3
 - data/src/util/pm_strpbrk.c +25 -19
 - metadata +12 -3
 - data/docs/prism.png +0 -0
 
    
        data/src/enc/pm_unicode.c
    CHANGED
    
    | 
         @@ -1,15 +1,14 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            // Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
         
     | 
| 
       2 
     | 
    
         
            -
            // decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
1 
     | 
    
         
             
            #include "prism/enc/pm_encoding.h"
         
     | 
| 
       5 
2 
     | 
    
         | 
| 
       6 
3 
     | 
    
         
             
            typedef uint32_t pm_unicode_codepoint_t;
         
     | 
| 
       7 
4 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
      
 5 
     | 
    
         
            +
            /**
         
     | 
| 
      
 6 
     | 
    
         
            +
             * Each element of the following table contains a bitfield that indicates a
         
     | 
| 
      
 7 
     | 
    
         
            +
             * piece of information about the corresponding unicode codepoint. Note that
         
     | 
| 
      
 8 
     | 
    
         
            +
             * this table is different from other encodings where we used a lookup table
         
     | 
| 
      
 9 
     | 
    
         
            +
             * because the indices of those tables are the byte representations, not the
         
     | 
| 
      
 10 
     | 
    
         
            +
             * codepoints themselves.
         
     | 
| 
      
 11 
     | 
    
         
            +
             */
         
     | 
| 
       13 
12 
     | 
    
         
             
            const uint8_t pm_encoding_unicode_table[256] = {
         
     | 
| 
       14 
13 
     | 
    
         
             
            //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
         
     | 
| 
       15 
14 
     | 
    
         
             
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
         
     | 
| 
         @@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C 
     | 
|
| 
       2179 
2178 
     | 
    
         
             
                0x1F170, 0x1F189,
         
     | 
| 
       2180 
2179 
     | 
    
         
             
            };
         
     | 
| 
       2181 
2180 
     | 
    
         | 
| 
      
 2181 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2182 
     | 
    
         
            +
             * Binary search through the given list of codepoints to see if the given
         
     | 
| 
      
 2183 
     | 
    
         
            +
             * codepoint is in the list.
         
     | 
| 
      
 2184 
     | 
    
         
            +
             */
         
     | 
| 
       2182 
2185 
     | 
    
         
             
            static bool
         
     | 
| 
       2183 
2186 
     | 
    
         
             
            pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
         
     | 
| 
       2184 
2187 
     | 
    
         
             
                size_t start = 0;
         
     | 
| 
         @@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co 
     | 
|
| 
       2202 
2205 
     | 
    
         
             
                return false;
         
     | 
| 
       2203 
2206 
     | 
    
         
             
            }
         
     | 
| 
       2204 
2207 
     | 
    
         | 
| 
      
 2208 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2209 
     | 
    
         
            +
             * A state transition table for decoding UTF-8.
         
     | 
| 
      
 2210 
     | 
    
         
            +
             *
         
     | 
| 
      
 2211 
     | 
    
         
            +
             * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
         
     | 
| 
      
 2212 
     | 
    
         
            +
             *
         
     | 
| 
      
 2213 
     | 
    
         
            +
             * Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 
      
 2214 
     | 
    
         
            +
             * of this software and associated documentation files (the "Software"), to deal
         
     | 
| 
      
 2215 
     | 
    
         
            +
             * in the Software without restriction, including without limitation the rights
         
     | 
| 
      
 2216 
     | 
    
         
            +
             * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 
      
 2217 
     | 
    
         
            +
             * copies of the Software, and to permit persons to whom the Software is
         
     | 
| 
      
 2218 
     | 
    
         
            +
             * furnished to do so, subject to the following conditions:
         
     | 
| 
      
 2219 
     | 
    
         
            +
             *
         
     | 
| 
      
 2220 
     | 
    
         
            +
             * The above copyright notice and this permission notice shall be included in
         
     | 
| 
      
 2221 
     | 
    
         
            +
             * all copies or substantial portions of the Software.
         
     | 
| 
      
 2222 
     | 
    
         
            +
             *
         
     | 
| 
      
 2223 
     | 
    
         
            +
             * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         
     | 
| 
      
 2224 
     | 
    
         
            +
             * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         
     | 
| 
      
 2225 
     | 
    
         
            +
             * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         
     | 
| 
      
 2226 
     | 
    
         
            +
             * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 
      
 2227 
     | 
    
         
            +
             * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 
      
 2228 
     | 
    
         
            +
             * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         
     | 
| 
      
 2229 
     | 
    
         
            +
             * SOFTWARE.
         
     | 
| 
      
 2230 
     | 
    
         
            +
             */
         
     | 
| 
       2205 
2231 
     | 
    
         
             
            static const uint8_t pm_utf_8_dfa[] = {
         
     | 
| 
       2206 
2232 
     | 
    
         
             
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
         
     | 
| 
       2207 
2233 
     | 
    
         
             
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
         
     | 
| 
         @@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = { 
     | 
|
| 
       2219 
2245 
     | 
    
         
             
                1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
         
     | 
| 
       2220 
2246 
     | 
    
         
             
            };
         
     | 
| 
       2221 
2247 
     | 
    
         | 
| 
      
 2248 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2249 
     | 
    
         
            +
             * Given a pointer to a string and the number of bytes remaining in the string,
         
     | 
| 
      
 2250 
     | 
    
         
            +
             * decode the next UTF-8 codepoint and return it. The number of bytes consumed
         
     | 
| 
      
 2251 
     | 
    
         
            +
             * is returned in the width out parameter.
         
     | 
| 
      
 2252 
     | 
    
         
            +
             */
         
     | 
| 
       2222 
2253 
     | 
    
         
             
            static pm_unicode_codepoint_t
         
     | 
| 
       2223 
2254 
     | 
    
         
             
            pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
         
     | 
| 
       2224 
2255 
     | 
    
         
             
                assert(n >= 1);
         
     | 
| 
         @@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) { 
     | 
|
| 
       2253 
2284 
     | 
    
         
             
                return width;
         
     | 
| 
       2254 
2285 
     | 
    
         
             
            }
         
     | 
| 
       2255 
2286 
     | 
    
         | 
| 
      
 2287 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2288 
     | 
    
         
            +
             * Return the size of the next character in the UTF-8 encoding if it is an
         
     | 
| 
      
 2289 
     | 
    
         
            +
             * alphabetical character.
         
     | 
| 
      
 2290 
     | 
    
         
            +
             */
         
     | 
| 
       2256 
2291 
     | 
    
         
             
            size_t
         
     | 
| 
       2257 
2292 
     | 
    
         
             
            pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
         
     | 
| 
       2258 
2293 
     | 
    
         
             
                if (*b < 0x80) {
         
     | 
| 
         @@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) { 
     | 
|
| 
       2269 
2304 
     | 
    
         
             
                }
         
     | 
| 
       2270 
2305 
     | 
    
         
             
            }
         
     | 
| 
       2271 
2306 
     | 
    
         | 
| 
      
 2307 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2308 
     | 
    
         
            +
             * Return the size of the next character in the UTF-8 encoding if it is an
         
     | 
| 
      
 2309 
     | 
    
         
            +
             * alphanumeric character.
         
     | 
| 
      
 2310 
     | 
    
         
            +
             */
         
     | 
| 
       2272 
2311 
     | 
    
         
             
            size_t
         
     | 
| 
       2273 
2312 
     | 
    
         
             
            pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
         
     | 
| 
       2274 
2313 
     | 
    
         
             
                if (*b < 0x80) {
         
     | 
| 
         @@ -2285,7 +2324,11 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) { 
     | 
|
| 
       2285 
2324 
     | 
    
         
             
                }
         
     | 
| 
       2286 
2325 
     | 
    
         
             
            }
         
     | 
| 
       2287 
2326 
     | 
    
         | 
| 
       2288 
     | 
    
         
            -
             
     | 
| 
      
 2327 
     | 
    
         
            +
            /**
         
     | 
| 
      
 2328 
     | 
    
         
            +
             * Return true if the next character in the UTF-8 encoding if it is an uppercase
         
     | 
| 
      
 2329 
     | 
    
         
            +
             * character.
         
     | 
| 
      
 2330 
     | 
    
         
            +
             */
         
     | 
| 
      
 2331 
     | 
    
         
            +
            bool
         
     | 
| 
       2289 
2332 
     | 
    
         
             
            pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
         
     | 
| 
       2290 
2333 
     | 
    
         
             
                if (*b < 0x80) {
         
     | 
| 
       2291 
2334 
     | 
    
         
             
                    return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
         
     | 
| 
         @@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { 
     | 
|
| 
       2305 
2348 
     | 
    
         
             
            #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
         
     | 
| 
       2306 
2349 
     | 
    
         
             
            #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
         
     | 
| 
       2307 
2350 
     | 
    
         | 
| 
      
 2351 
     | 
    
         
            +
            /** UTF-8 */
         
     | 
| 
       2308 
2352 
     | 
    
         
             
            pm_encoding_t pm_encoding_utf_8 = {
         
     | 
| 
       2309 
2353 
     | 
    
         
             
                .name = "utf-8",
         
     | 
| 
       2310 
2354 
     | 
    
         
             
                .char_width = pm_encoding_utf_8_char_width,
         
     | 
| 
         @@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = { 
     | 
|
| 
       2314 
2358 
     | 
    
         
             
                .multibyte = true
         
     | 
| 
       2315 
2359 
     | 
    
         
             
            };
         
     | 
| 
       2316 
2360 
     | 
    
         | 
| 
      
 2361 
     | 
    
         
            +
            /** UTF8-mac */
         
     | 
| 
       2317 
2362 
     | 
    
         
             
            pm_encoding_t pm_encoding_utf8_mac = {
         
     | 
| 
       2318 
2363 
     | 
    
         
             
                .name = "utf8-mac",
         
     | 
| 
       2319 
2364 
     | 
    
         
             
                .char_width = pm_encoding_utf_8_char_width,
         
     | 
    
        data/src/enc/pm_windows_31j.c
    CHANGED