prism 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +16 -1
 - data/Makefile +6 -0
 - data/README.md +1 -1
 - data/config.yml +50 -35
 - data/docs/fuzzing.md +1 -1
 - data/docs/serialization.md +28 -29
 - data/ext/prism/api_node.c +802 -770
 - data/ext/prism/api_pack.c +20 -9
 - data/ext/prism/extension.c +464 -162
 - data/ext/prism/extension.h +1 -1
 - data/include/prism/ast.h +3173 -763
 - data/include/prism/defines.h +32 -9
 - data/include/prism/diagnostic.h +36 -3
 - data/include/prism/enc/pm_encoding.h +118 -28
 - data/include/prism/node.h +38 -13
 - data/include/prism/options.h +204 -0
 - data/include/prism/pack.h +44 -33
 - data/include/prism/parser.h +445 -200
 - data/include/prism/prettyprint.h +12 -1
 - data/include/prism/regexp.h +16 -2
 - data/include/prism/util/pm_buffer.h +94 -16
 - data/include/prism/util/pm_char.h +162 -48
 - data/include/prism/util/pm_constant_pool.h +126 -32
 - data/include/prism/util/pm_list.h +68 -38
 - data/include/prism/util/pm_memchr.h +18 -3
 - data/include/prism/util/pm_newline_list.h +70 -27
 - data/include/prism/util/pm_state_stack.h +25 -7
 - data/include/prism/util/pm_string.h +115 -27
 - data/include/prism/util/pm_string_list.h +25 -6
 - data/include/prism/util/pm_strncasecmp.h +32 -0
 - data/include/prism/util/pm_strpbrk.h +31 -17
 - data/include/prism/version.h +27 -2
 - data/include/prism.h +224 -31
 - data/lib/prism/compiler.rb +6 -3
 - data/lib/prism/debug.rb +23 -7
 - data/lib/prism/dispatcher.rb +33 -18
 - data/lib/prism/dsl.rb +10 -5
 - data/lib/prism/ffi.rb +132 -80
 - data/lib/prism/lex_compat.rb +25 -15
 - data/lib/prism/mutation_compiler.rb +10 -5
 - data/lib/prism/node.rb +370 -135
 - data/lib/prism/node_ext.rb +1 -1
 - data/lib/prism/node_inspector.rb +1 -1
 - data/lib/prism/pack.rb +79 -40
 - data/lib/prism/parse_result/comments.rb +7 -2
 - data/lib/prism/parse_result/newlines.rb +4 -0
 - data/lib/prism/parse_result.rb +150 -30
 - data/lib/prism/pattern.rb +11 -0
 - data/lib/prism/ripper_compat.rb +28 -10
 - data/lib/prism/serialize.rb +86 -54
 - data/lib/prism/visitor.rb +10 -3
 - data/lib/prism.rb +20 -2
 - data/prism.gemspec +4 -2
 - data/rbi/prism.rbi +104 -60
 - data/rbi/prism_static.rbi +16 -2
 - data/sig/prism.rbs +72 -43
 - data/sig/prism_static.rbs +14 -1
 - data/src/diagnostic.c +56 -53
 - data/src/enc/pm_big5.c +1 -0
 - data/src/enc/pm_euc_jp.c +1 -0
 - data/src/enc/pm_gbk.c +1 -0
 - data/src/enc/pm_shift_jis.c +1 -0
 - data/src/enc/pm_tables.c +316 -80
 - data/src/enc/pm_unicode.c +53 -8
 - data/src/enc/pm_windows_31j.c +1 -0
 - data/src/node.c +334 -321
 - data/src/options.c +170 -0
 - data/src/prettyprint.c +74 -47
 - data/src/prism.c +1642 -856
 - data/src/regexp.c +151 -95
 - data/src/serialize.c +44 -20
 - data/src/token_type.c +3 -1
 - data/src/util/pm_buffer.c +45 -15
 - data/src/util/pm_char.c +103 -57
 - data/src/util/pm_constant_pool.c +51 -21
 - data/src/util/pm_list.c +12 -4
 - data/src/util/pm_memchr.c +5 -3
 - data/src/util/pm_newline_list.c +20 -12
 - data/src/util/pm_state_stack.c +9 -3
 - data/src/util/pm_string.c +95 -85
 - data/src/util/pm_string_list.c +14 -15
 - data/src/util/pm_strncasecmp.c +10 -3
 - data/src/util/pm_strpbrk.c +25 -19
 - metadata +5 -3
 - data/docs/prism.png +0 -0
 
    
        data/src/regexp.c
    CHANGED
    
    | 
         @@ -1,16 +1,31 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            #include "prism/regexp.h"
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
      
 3 
     | 
    
         
            +
            /**
         
     | 
| 
      
 4 
     | 
    
         
            +
             * This is the parser that is going to handle parsing regular expressions.
         
     | 
| 
      
 5 
     | 
    
         
            +
             */
         
     | 
| 
       4 
6 
     | 
    
         
             
            typedef struct {
         
     | 
| 
      
 7 
     | 
    
         
            +
                /** A pointer to the start of the source that we are parsing. */
         
     | 
| 
       5 
8 
     | 
    
         
             
                const uint8_t *start;
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                /** A pointer to the current position in the source. */
         
     | 
| 
       6 
11 
     | 
    
         
             
                const uint8_t *cursor;
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                /** A pointer to the end of the source that we are parsing. */
         
     | 
| 
       7 
14 
     | 
    
         
             
                const uint8_t *end;
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                /** A list of named captures that we've found. */
         
     | 
| 
       8 
17 
     | 
    
         
             
                pm_string_list_t *named_captures;
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                /** Whether the encoding has changed from the default. */
         
     | 
| 
       9 
20 
     | 
    
         
             
                bool encoding_changed;
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                /** The encoding of the source. */
         
     | 
| 
       10 
23 
     | 
    
         
             
                pm_encoding_t *encoding;
         
     | 
| 
       11 
24 
     | 
    
         
             
            } pm_regexp_parser_t;
         
     | 
| 
       12 
25 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
      
 26 
     | 
    
         
            +
            /**
         
     | 
| 
      
 27 
     | 
    
         
            +
             * This initializes a new parser with the given source.
         
     | 
| 
      
 28 
     | 
    
         
            +
             */
         
     | 
| 
       14 
29 
     | 
    
         
             
            static void
         
     | 
| 
       15 
30 
     | 
    
         
             
            pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
         
     | 
| 
       16 
31 
     | 
    
         
             
                *parser = (pm_regexp_parser_t) {
         
     | 
| 
         @@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui 
     | 
|
| 
       23 
38 
     | 
    
         
             
                };
         
     | 
| 
       24 
39 
     | 
    
         
             
            }
         
     | 
| 
       25 
40 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 41 
     | 
    
         
            +
            /**
         
     | 
| 
      
 42 
     | 
    
         
            +
             * This appends a new string to the list of named captures.
         
     | 
| 
      
 43 
     | 
    
         
            +
             */
         
     | 
| 
       27 
44 
     | 
    
         
             
            static void
         
     | 
| 
       28 
45 
     | 
    
         
             
            pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
         
     | 
| 
       29 
46 
     | 
    
         
             
                pm_string_t string;
         
     | 
| 
         @@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, 
     | 
|
| 
       32 
49 
     | 
    
         
             
                pm_string_free(&string);
         
     | 
| 
       33 
50 
     | 
    
         
             
            }
         
     | 
| 
       34 
51 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
      
 52 
     | 
    
         
            +
            /**
         
     | 
| 
      
 53 
     | 
    
         
            +
             * Returns true if the next character is the end of the source.
         
     | 
| 
      
 54 
     | 
    
         
            +
             */
         
     | 
| 
       36 
55 
     | 
    
         
             
            static inline bool
         
     | 
| 
       37 
56 
     | 
    
         
             
            pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
         
     | 
| 
       38 
57 
     | 
    
         
             
                return parser->cursor >= parser->end;
         
     | 
| 
       39 
58 
     | 
    
         
             
            }
         
     | 
| 
       40 
59 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
      
 60 
     | 
    
         
            +
            /**
         
     | 
| 
      
 61 
     | 
    
         
            +
             * Optionally accept a char and consume it if it exists.
         
     | 
| 
      
 62 
     | 
    
         
            +
             */
         
     | 
| 
       42 
63 
     | 
    
         
             
            static inline bool
         
     | 
| 
       43 
64 
     | 
    
         
             
            pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
         
     | 
| 
       44 
65 
     | 
    
         
             
                if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
         
     | 
| 
         @@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) { 
     | 
|
| 
       48 
69 
     | 
    
         
             
                return false;
         
     | 
| 
       49 
70 
     | 
    
         
             
            }
         
     | 
| 
       50 
71 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
      
 72 
     | 
    
         
            +
            /**
         
     | 
| 
      
 73 
     | 
    
         
            +
             * Expect a character to be present and consume it.
         
     | 
| 
      
 74 
     | 
    
         
            +
             */
         
     | 
| 
       52 
75 
     | 
    
         
             
            static inline bool
         
     | 
| 
       53 
76 
     | 
    
         
             
            pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
         
     | 
| 
       54 
77 
     | 
    
         
             
                if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
         
     | 
| 
         @@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) { 
     | 
|
| 
       58 
81 
     | 
    
         
             
                return false;
         
     | 
| 
       59 
82 
     | 
    
         
             
            }
         
     | 
| 
       60 
83 
     | 
    
         | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
      
 84 
     | 
    
         
            +
            /**
         
     | 
| 
      
 85 
     | 
    
         
            +
             * This advances the current token to the next instance of the given character.
         
     | 
| 
      
 86 
     | 
    
         
            +
             */
         
     | 
| 
       62 
87 
     | 
    
         
             
            static bool
         
     | 
| 
       63 
88 
     | 
    
         
             
            pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
         
     | 
| 
       64 
89 
     | 
    
         
             
                if (pm_regexp_char_is_eof(parser)) {
         
     | 
| 
         @@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) { 
     | 
|
| 
       74 
99 
     | 
    
         
             
                return true;
         
     | 
| 
       75 
100 
     | 
    
         
             
            }
         
     | 
| 
       76 
101 
     | 
    
         | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
      
 102 
     | 
    
         
            +
            /**
         
     | 
| 
      
 103 
     | 
    
         
            +
             * Range quantifiers are a special class of quantifiers that look like
         
     | 
| 
      
 104 
     | 
    
         
            +
             *
         
     | 
| 
      
 105 
     | 
    
         
            +
             * * {digit}
         
     | 
| 
      
 106 
     | 
    
         
            +
             * * {digit,}
         
     | 
| 
      
 107 
     | 
    
         
            +
             * * {digit,digit}
         
     | 
| 
      
 108 
     | 
    
         
            +
             * * {,digit}
         
     | 
| 
      
 109 
     | 
    
         
            +
             *
         
     | 
| 
      
 110 
     | 
    
         
            +
             * Unfortunately, if there are any spaces in between, then this just becomes a
         
     | 
| 
      
 111 
     | 
    
         
            +
             * regular character match expression and we have to backtrack. So when this
         
     | 
| 
      
 112 
     | 
    
         
            +
             * function first starts running, we'll create a "save" point and then attempt
         
     | 
| 
      
 113 
     | 
    
         
            +
             * to parse the quantifier. If it fails, we'll restore the save point and
         
     | 
| 
      
 114 
     | 
    
         
            +
             * return.
         
     | 
| 
      
 115 
     | 
    
         
            +
             *
         
     | 
| 
      
 116 
     | 
    
         
            +
             * The properly track everything, we're going to build a little state machine.
         
     | 
| 
      
 117 
     | 
    
         
            +
             * It looks something like the following:
         
     | 
| 
      
 118 
     | 
    
         
            +
             *
         
     | 
| 
      
 119 
     | 
    
         
            +
             *                  ┌───────┐                 ┌─────────┐ ────────────┐
         
     | 
| 
      
 120 
     | 
    
         
            +
             * ──── lbrace ───> │ start │ ──── digit ───> │ minimum │             │
         
     | 
| 
      
 121 
     | 
    
         
            +
             *                  └───────┘                 └─────────┘ <─── digit ─┘
         
     | 
| 
      
 122 
     | 
    
         
            +
             *                      │                       │    │
         
     | 
| 
      
 123 
     | 
    
         
            +
             *   ┌───────┐          │                       │  rbrace
         
     | 
| 
      
 124 
     | 
    
         
            +
             *   │ comma │ <───── comma  ┌──── comma ───────┘    │
         
     | 
| 
      
 125 
     | 
    
         
            +
             *   └───────┘               V                       V
         
     | 
| 
      
 126 
     | 
    
         
            +
             *      │             ┌─────────┐               ┌─────────┐
         
     | 
| 
      
 127 
     | 
    
         
            +
             *      └── digit ──> │ maximum │ ── rbrace ──> │| final |│
         
     | 
| 
      
 128 
     | 
    
         
            +
             *                    └─────────┘               └─────────┘
         
     | 
| 
      
 129 
     | 
    
         
            +
             *                    │         ^
         
     | 
| 
      
 130 
     | 
    
         
            +
             *                    └─ digit ─┘
         
     | 
| 
      
 131 
     | 
    
         
            +
             *
         
     | 
| 
      
 132 
     | 
    
         
            +
             * Note that by the time we've hit this function, the lbrace has already been
         
     | 
| 
      
 133 
     | 
    
         
            +
             * consumed so we're in the start state.
         
     | 
| 
      
 134 
     | 
    
         
            +
             */
         
     | 
| 
       108 
135 
     | 
    
         
             
            static bool
         
     | 
| 
       109 
136 
     | 
    
         
             
            pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
         
     | 
| 
       110 
137 
     | 
    
         
             
                const uint8_t *savepoint = parser->cursor;
         
     | 
| 
         @@ -180,12 +207,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) { 
     | 
|
| 
       180 
207 
     | 
    
         
             
                return true;
         
     | 
| 
       181 
208 
     | 
    
         
             
            }
         
     | 
| 
       182 
209 
     | 
    
         | 
| 
       183 
     | 
    
         
            -
             
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
       185 
     | 
    
         
            -
             
     | 
| 
       186 
     | 
    
         
            -
             
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
      
 210 
     | 
    
         
            +
            /**
         
     | 
| 
      
 211 
     | 
    
         
            +
             * quantifier : star-quantifier
         
     | 
| 
      
 212 
     | 
    
         
            +
             *            | plus-quantifier
         
     | 
| 
      
 213 
     | 
    
         
            +
             *            | optional-quantifier
         
     | 
| 
      
 214 
     | 
    
         
            +
             *            | range-quantifier
         
     | 
| 
      
 215 
     | 
    
         
            +
             *            | <empty>
         
     | 
| 
      
 216 
     | 
    
         
            +
             *            ;
         
     | 
| 
      
 217 
     | 
    
         
            +
             */
         
     | 
| 
       189 
218 
     | 
    
         
             
            static bool
         
     | 
| 
       190 
219 
     | 
    
         
             
            pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
         
     | 
| 
       191 
220 
     | 
    
         
             
                if (pm_regexp_char_is_eof(parser)) return true;
         
     | 
| 
         @@ -205,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) { 
     | 
|
| 
       205 
234 
     | 
    
         
             
                }
         
     | 
| 
       206 
235 
     | 
    
         
             
            }
         
     | 
| 
       207 
236 
     | 
    
         | 
| 
       208 
     | 
    
         
            -
             
     | 
| 
       209 
     | 
    
         
            -
             
     | 
| 
      
 237 
     | 
    
         
            +
            /**
         
     | 
| 
      
 238 
     | 
    
         
            +
             * match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
         
     | 
| 
      
 239 
     | 
    
         
            +
             *                   ;
         
     | 
| 
      
 240 
     | 
    
         
            +
             */
         
     | 
| 
       210 
241 
     | 
    
         
             
            static bool
         
     | 
| 
       211 
242 
     | 
    
         
             
            pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
         
     | 
| 
       212 
243 
     | 
    
         
             
                if (!pm_regexp_char_expect(parser, ':')) {
         
     | 
| 
         @@ -226,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) { 
     | 
|
| 
       226 
257 
     | 
    
         
             
            static bool
         
     | 
| 
       227 
258 
     | 
    
         
             
            pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
         
     | 
| 
       228 
259 
     | 
    
         | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
             
     | 
| 
      
 260 
     | 
    
         
            +
            /**
         
     | 
| 
      
 261 
     | 
    
         
            +
             * match-char-set : '[' '^'? (match-range | match-char)* ']'
         
     | 
| 
      
 262 
     | 
    
         
            +
             *                ;
         
     | 
| 
      
 263 
     | 
    
         
            +
             */
         
     | 
| 
       231 
264 
     | 
    
         
             
            static bool
         
     | 
| 
       232 
265 
     | 
    
         
             
            pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
         
     | 
| 
       233 
266 
     | 
    
         
             
                pm_regexp_char_accept(parser, '^');
         
     | 
| 
         @@ -251,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) { 
     | 
|
| 
       251 
284 
     | 
    
         
             
                return pm_regexp_char_expect(parser, ']');
         
     | 
| 
       252 
285 
     | 
    
         
             
            }
         
     | 
| 
       253 
286 
     | 
    
         | 
| 
       254 
     | 
    
         
            -
             
     | 
| 
      
 287 
     | 
    
         
            +
            /**
         
     | 
| 
      
 288 
     | 
    
         
            +
             * A left bracket can either mean a POSIX class or a character set.
         
     | 
| 
      
 289 
     | 
    
         
            +
             */
         
     | 
| 
       255 
290 
     | 
    
         
             
            static bool
         
     | 
| 
       256 
291 
     | 
    
         
             
            pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
         
     | 
| 
       257 
292 
     | 
    
         
             
                const uint8_t *reset = parser->cursor;
         
     | 
| 
         @@ -271,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) { 
     | 
|
| 
       271 
306 
     | 
    
         
             
            static bool
         
     | 
| 
       272 
307 
     | 
    
         
             
            pm_regexp_parse_expression(pm_regexp_parser_t *parser);
         
     | 
| 
       273 
308 
     | 
    
         | 
| 
       274 
     | 
    
         
            -
             
     | 
| 
       275 
     | 
    
         
            -
             
     | 
| 
      
 309 
     | 
    
         
            +
            /**
         
     | 
| 
      
 310 
     | 
    
         
            +
             * These are the states of the options that are configurable on the regular
         
     | 
| 
      
 311 
     | 
    
         
            +
             * expression (or from within a group).
         
     | 
| 
      
 312 
     | 
    
         
            +
             */
         
     | 
| 
       276 
313 
     | 
    
         
             
            typedef enum {
         
     | 
| 
       277 
314 
     | 
    
         
             
                PM_REGEXP_OPTION_STATE_INVALID,
         
     | 
| 
       278 
315 
     | 
    
         
             
                PM_REGEXP_OPTION_STATE_TOGGLEABLE,
         
     | 
| 
         @@ -283,16 +320,22 @@ typedef enum { 
     | 
|
| 
       283 
320 
     | 
    
         | 
| 
       284 
321 
     | 
    
         
             
            // These are the options that are configurable on the regular expression (or
         
     | 
| 
       285 
322 
     | 
    
         
             
            // from within a group).
         
     | 
| 
      
 323 
     | 
    
         
            +
             
     | 
| 
       286 
324 
     | 
    
         
             
            #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
         
     | 
| 
       287 
325 
     | 
    
         
             
            #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
         
     | 
| 
       288 
326 
     | 
    
         
             
            #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
         
     | 
| 
       289 
327 
     | 
    
         | 
| 
       290 
     | 
    
         
            -
             
     | 
| 
      
 328 
     | 
    
         
            +
            /**
         
     | 
| 
      
 329 
     | 
    
         
            +
             * This is the set of options that are configurable on the regular expression.
         
     | 
| 
      
 330 
     | 
    
         
            +
             */
         
     | 
| 
       291 
331 
     | 
    
         
             
            typedef struct {
         
     | 
| 
      
 332 
     | 
    
         
            +
                /** The current state of each option. */
         
     | 
| 
       292 
333 
     | 
    
         
             
                uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
         
     | 
| 
       293 
334 
     | 
    
         
             
            } pm_regexp_options_t;
         
     | 
| 
       294 
335 
     | 
    
         | 
| 
       295 
     | 
    
         
            -
             
     | 
| 
      
 336 
     | 
    
         
            +
            /**
         
     | 
| 
      
 337 
     | 
    
         
            +
             * Initialize a new set of options to their default values.
         
     | 
| 
      
 338 
     | 
    
         
            +
             */
         
     | 
| 
       296 
339 
     | 
    
         
             
            static void
         
     | 
| 
       297 
340 
     | 
    
         
             
            pm_regexp_options_init(pm_regexp_options_t *options) {
         
     | 
| 
       298 
341 
     | 
    
         
             
                memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
         
     | 
| 
         @@ -304,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) { 
     | 
|
| 
       304 
347 
     | 
    
         
             
                options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
         
     | 
| 
       305 
348 
     | 
    
         
             
            }
         
     | 
| 
       306 
349 
     | 
    
         | 
| 
       307 
     | 
    
         
            -
             
     | 
| 
       308 
     | 
    
         
            -
             
     | 
| 
      
 350 
     | 
    
         
            +
            /**
         
     | 
| 
      
 351 
     | 
    
         
            +
             * Attempt to add the given option to the set of options. Returns true if it was
         
     | 
| 
      
 352 
     | 
    
         
            +
             * added, false if it was already present.
         
     | 
| 
      
 353 
     | 
    
         
            +
             */
         
     | 
| 
       309 
354 
     | 
    
         
             
            static bool
         
     | 
| 
       310 
355 
     | 
    
         
             
            pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
         
     | 
| 
       311 
356 
     | 
    
         
             
                if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
         
     | 
| 
         @@ -327,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) { 
     | 
|
| 
       327 
372 
     | 
    
         
             
                return false;
         
     | 
| 
       328 
373 
     | 
    
         
             
            }
         
     | 
| 
       329 
374 
     | 
    
         | 
| 
       330 
     | 
    
         
            -
             
     | 
| 
       331 
     | 
    
         
            -
             
     | 
| 
      
 375 
     | 
    
         
            +
            /**
         
     | 
| 
      
 376 
     | 
    
         
            +
             * Attempt to remove the given option from the set of options. Returns true if
         
     | 
| 
      
 377 
     | 
    
         
            +
             * it was removed, false if it was already absent.
         
     | 
| 
      
 378 
     | 
    
         
            +
             */
         
     | 
| 
       332 
379 
     | 
    
         
             
            static bool
         
     | 
| 
       333 
380 
     | 
    
         
             
            pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
         
     | 
| 
       334 
381 
     | 
    
         
             
                if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
         
     | 
| 
         @@ -349,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) { 
     | 
|
| 
       349 
396 
     | 
    
         
             
                return false;
         
     | 
| 
       350 
397 
     | 
    
         
             
            }
         
     | 
| 
       351 
398 
     | 
    
         | 
| 
       352 
     | 
    
         
            -
             
     | 
| 
       353 
     | 
    
         
            -
             
     | 
| 
       354 
     | 
    
         
            -
             
     | 
| 
       355 
     | 
    
         
            -
             
     | 
| 
       356 
     | 
    
         
            -
             
     | 
| 
       357 
     | 
    
         
            -
             
     | 
| 
       358 
     | 
    
         
            -
             
     | 
| 
       359 
     | 
    
         
            -
             
     | 
| 
       360 
     | 
    
         
            -
             
     | 
| 
       361 
     | 
    
         
            -
             
     | 
| 
       362 
     | 
    
         
            -
             
     | 
| 
       363 
     | 
    
         
            -
             
     | 
| 
       364 
     | 
    
         
            -
             
     | 
| 
       365 
     | 
    
         
            -
             
     | 
| 
       366 
     | 
    
         
            -
             
     | 
| 
       367 
     | 
    
         
            -
             
     | 
| 
       368 
     | 
    
         
            -
             
     | 
| 
       369 
     | 
    
         
            -
             
     | 
| 
       370 
     | 
    
         
            -
             
     | 
| 
       371 
     | 
    
         
            -
             
     | 
| 
      
 399 
     | 
    
         
            +
            /**
         
     | 
| 
      
 400 
     | 
    
         
            +
             * Groups can have quite a few different patterns for syntax. They basically
         
     | 
| 
      
 401 
     | 
    
         
            +
             * just wrap a set of expressions, but they can potentially have options after a
         
     | 
| 
      
 402 
     | 
    
         
            +
             * question mark. If there _isn't_ a question mark, then it's just a set of
         
     | 
| 
      
 403 
     | 
    
         
            +
             * expressions. If there _is_, then here are the options:
         
     | 
| 
      
 404 
     | 
    
         
            +
             *
         
     | 
| 
      
 405 
     | 
    
         
            +
             * * (?#...)                       - inline comments
         
     | 
| 
      
 406 
     | 
    
         
            +
             * * (?:subexp)                    - non-capturing group
         
     | 
| 
      
 407 
     | 
    
         
            +
             * * (?=subexp)                    - positive lookahead
         
     | 
| 
      
 408 
     | 
    
         
            +
             * * (?!subexp)                    - negative lookahead
         
     | 
| 
      
 409 
     | 
    
         
            +
             * * (?>subexp)                    - atomic group
         
     | 
| 
      
 410 
     | 
    
         
            +
             * * (?~subexp)                    - absence operator
         
     | 
| 
      
 411 
     | 
    
         
            +
             * * (?<=subexp)                   - positive lookbehind
         
     | 
| 
      
 412 
     | 
    
         
            +
             * * (?<!subexp)                   - negative lookbehind
         
     | 
| 
      
 413 
     | 
    
         
            +
             * * (?<name>subexp)               - named capturing group
         
     | 
| 
      
 414 
     | 
    
         
            +
             * * (?'name'subexp)               - named capturing group
         
     | 
| 
      
 415 
     | 
    
         
            +
             * * (?(cond)yes-subexp)           - conditional expression
         
     | 
| 
      
 416 
     | 
    
         
            +
             * * (?(cond)yes-subexp|no-subexp) - conditional expression
         
     | 
| 
      
 417 
     | 
    
         
            +
             * * (?imxdau-imx)                 - turn on and off configuration
         
     | 
| 
      
 418 
     | 
    
         
            +
             * * (?imxdau-imx:subexp)          - turn on and off configuration for an expression
         
     | 
| 
      
 419 
     | 
    
         
            +
             */
         
     | 
| 
       372 
420 
     | 
    
         
             
            static bool
         
     | 
| 
       373 
421 
     | 
    
         
             
            pm_regexp_parse_group(pm_regexp_parser_t *parser) {
         
     | 
| 
       374 
422 
     | 
    
         
             
                // First, parse any options for the group.
         
     | 
| 
         @@ -503,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) { 
     | 
|
| 
       503 
551 
     | 
    
         
             
                return pm_regexp_char_expect(parser, ')');
         
     | 
| 
       504 
552 
     | 
    
         
             
            }
         
     | 
| 
       505 
553 
     | 
    
         | 
| 
       506 
     | 
    
         
            -
             
     | 
| 
       507 
     | 
    
         
            -
             
     | 
| 
       508 
     | 
    
         
            -
             
     | 
| 
       509 
     | 
    
         
            -
             
     | 
| 
       510 
     | 
    
         
            -
             
     | 
| 
       511 
     | 
    
         
            -
             
     | 
| 
       512 
     | 
    
         
            -
             
     | 
| 
       513 
     | 
    
         
            -
             
     | 
| 
       514 
     | 
    
         
            -
             
     | 
| 
       515 
     | 
    
         
            -
             
     | 
| 
      
 554 
     | 
    
         
            +
            /**
         
     | 
| 
      
 555 
     | 
    
         
            +
             * item : anchor
         
     | 
| 
      
 556 
     | 
    
         
            +
             *      | match-posix-class
         
     | 
| 
      
 557 
     | 
    
         
            +
             *      | match-char-set
         
     | 
| 
      
 558 
     | 
    
         
            +
             *      | match-char-class
         
     | 
| 
      
 559 
     | 
    
         
            +
             *      | match-char-prop
         
     | 
| 
      
 560 
     | 
    
         
            +
             *      | match-char
         
     | 
| 
      
 561 
     | 
    
         
            +
             *      | match-any
         
     | 
| 
      
 562 
     | 
    
         
            +
             *      | group
         
     | 
| 
      
 563 
     | 
    
         
            +
             *      | quantified
         
     | 
| 
      
 564 
     | 
    
         
            +
             *      ;
         
     | 
| 
      
 565 
     | 
    
         
            +
             */
         
     | 
| 
       516 
566 
     | 
    
         
             
            static bool
         
     | 
| 
       517 
567 
     | 
    
         
             
            pm_regexp_parse_item(pm_regexp_parser_t *parser) {
         
     | 
| 
       518 
568 
     | 
    
         
             
                switch (*parser->cursor++) {
         
     | 
| 
         @@ -533,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) { 
     | 
|
| 
       533 
583 
     | 
    
         
             
                }
         
     | 
| 
       534 
584 
     | 
    
         
             
            }
         
     | 
| 
       535 
585 
     | 
    
         | 
| 
       536 
     | 
    
         
            -
             
     | 
| 
       537 
     | 
    
         
            -
             
     | 
| 
      
 586 
     | 
    
         
            +
            /**
         
     | 
| 
      
 587 
     | 
    
         
            +
             * expression : item+
         
     | 
| 
      
 588 
     | 
    
         
            +
             *            ;
         
     | 
| 
      
 589 
     | 
    
         
            +
             */
         
     | 
| 
       538 
590 
     | 
    
         
             
            static bool
         
     | 
| 
       539 
591 
     | 
    
         
             
            pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
         
     | 
| 
       540 
592 
     | 
    
         
             
                if (!pm_regexp_parse_item(parser)) {
         
     | 
| 
         @@ -550,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) { 
     | 
|
| 
       550 
602 
     | 
    
         
             
                return true;
         
     | 
| 
       551 
603 
     | 
    
         
             
            }
         
     | 
| 
       552 
604 
     | 
    
         | 
| 
       553 
     | 
    
         
            -
             
     | 
| 
       554 
     | 
    
         
            -
             
     | 
| 
       555 
     | 
    
         
            -
             
     | 
| 
       556 
     | 
    
         
            -
             
     | 
| 
      
 605 
     | 
    
         
            +
            /**
         
     | 
| 
      
 606 
     | 
    
         
            +
             * pattern : EOF
         
     | 
| 
      
 607 
     | 
    
         
            +
             *         | expression EOF
         
     | 
| 
      
 608 
     | 
    
         
            +
             *         | expression '|' pattern
         
     | 
| 
      
 609 
     | 
    
         
            +
             *         ;
         
     | 
| 
      
 610 
     | 
    
         
            +
             */
         
     | 
| 
       557 
611 
     | 
    
         
             
            static bool
         
     | 
| 
       558 
612 
     | 
    
         
             
            pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
         
     | 
| 
       559 
613 
     | 
    
         
             
                return (
         
     | 
| 
         @@ -572,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) { 
     | 
|
| 
       572 
626 
     | 
    
         
             
                );
         
     | 
| 
       573 
627 
     | 
    
         
             
            }
         
     | 
| 
       574 
628 
     | 
    
         | 
| 
       575 
     | 
    
         
            -
             
     | 
| 
       576 
     | 
    
         
            -
             
     | 
| 
      
 629 
     | 
    
         
            +
            /**
         
     | 
| 
      
 630 
     | 
    
         
            +
             * Parse a regular expression and extract the names of all of the named capture
         
     | 
| 
      
 631 
     | 
    
         
            +
             * groups.
         
     | 
| 
      
 632 
     | 
    
         
            +
             */
         
     | 
| 
       577 
633 
     | 
    
         
             
            PRISM_EXPORTED_FUNCTION bool
         
     | 
| 
       578 
634 
     | 
    
         
             
            pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
         
     | 
| 
       579 
635 
     | 
    
         
             
                pm_regexp_parser_t parser;
         
     | 
    
        data/src/serialize.c
    CHANGED
    
    | 
         @@ -54,7 +54,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe 
     | 
|
| 
       54 
54 
     | 
    
         
             
                }
         
     | 
| 
       55 
55 
     | 
    
         
             
            }
         
     | 
| 
       56 
56 
     | 
    
         | 
| 
       57 
     | 
    
         
            -
            void
         
     | 
| 
      
 57 
     | 
    
         
            +
            static void
         
     | 
| 
       58 
58 
     | 
    
         
             
            pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
         
     | 
| 
       59 
59 
     | 
    
         
             
                pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
         
     | 
| 
       60 
60 
     | 
    
         | 
| 
         @@ -1131,16 +1131,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { 
     | 
|
| 
       1131 
1131 
     | 
    
         
             
                        }
         
     | 
| 
       1132 
1132 
     | 
    
         
             
                        break;
         
     | 
| 
       1133 
1133 
     | 
    
         
             
                    }
         
     | 
| 
       1134 
     | 
    
         
            -
                    case PM_KEYWORD_PARAMETER_NODE: {
         
     | 
| 
       1135 
     | 
    
         
            -
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_parameter_node_t *)node)->name));
         
     | 
| 
       1136 
     | 
    
         
            -
                        pm_serialize_location(parser, &((pm_keyword_parameter_node_t *)node)->name_loc, buffer);
         
     | 
| 
       1137 
     | 
    
         
            -
                        if (((pm_keyword_parameter_node_t *)node)->value == NULL) {
         
     | 
| 
       1138 
     | 
    
         
            -
                            pm_buffer_append_byte(buffer, 0);
         
     | 
| 
       1139 
     | 
    
         
            -
                        } else {
         
     | 
| 
       1140 
     | 
    
         
            -
                            pm_serialize_node(parser, (pm_node_t *)((pm_keyword_parameter_node_t *)node)->value, buffer);
         
     | 
| 
       1141 
     | 
    
         
            -
                        }
         
     | 
| 
       1142 
     | 
    
         
            -
                        break;
         
     | 
| 
       1143 
     | 
    
         
            -
                    }
         
     | 
| 
       1144 
1134 
     | 
    
         
             
                    case PM_KEYWORD_REST_PARAMETER_NODE: {
         
     | 
| 
       1145 
1135 
     | 
    
         
             
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
         
     | 
| 
       1146 
1136 
     | 
    
         
             
                        if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
         
     | 
| 
         @@ -1348,6 +1338,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { 
     | 
|
| 
       1348 
1338 
     | 
    
         
             
                        pm_buffer_append_varint(buffer, ((pm_numbered_reference_read_node_t *)node)->number);
         
     | 
| 
       1349 
1339 
     | 
    
         
             
                        break;
         
     | 
| 
       1350 
1340 
     | 
    
         
             
                    }
         
     | 
| 
      
 1341 
     | 
    
         
            +
                    case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
         
     | 
| 
      
 1342 
     | 
    
         
            +
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
         
     | 
| 
      
 1343 
     | 
    
         
            +
                        pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
         
     | 
| 
      
 1344 
     | 
    
         
            +
                        pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
         
     | 
| 
      
 1345 
     | 
    
         
            +
                        break;
         
     | 
| 
      
 1346 
     | 
    
         
            +
                    }
         
     | 
| 
       1351 
1347 
     | 
    
         
             
                    case PM_OPTIONAL_PARAMETER_NODE: {
         
     | 
| 
       1352 
1348 
     | 
    
         
             
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
         
     | 
| 
       1353 
1349 
     | 
    
         
             
                        pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
         
     | 
| 
         @@ -1482,6 +1478,11 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { 
     | 
|
| 
       1482 
1478 
     | 
    
         
             
                        pm_buffer_append_varint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
         
     | 
| 
       1483 
1479 
     | 
    
         
             
                        break;
         
     | 
| 
       1484 
1480 
     | 
    
         
             
                    }
         
     | 
| 
      
 1481 
     | 
    
         
            +
                    case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
         
     | 
| 
      
 1482 
     | 
    
         
            +
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
         
     | 
| 
      
 1483 
     | 
    
         
            +
                        pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
         
     | 
| 
      
 1484 
     | 
    
         
            +
                        break;
         
     | 
| 
      
 1485 
     | 
    
         
            +
                    }
         
     | 
| 
       1485 
1486 
     | 
    
         
             
                    case PM_REQUIRED_PARAMETER_NODE: {
         
     | 
| 
       1486 
1487 
     | 
    
         
             
                        pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
         
     | 
| 
       1487 
1488 
     | 
    
         
             
                        break;
         
     | 
| 
         @@ -1785,6 +1786,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu 
     | 
|
| 
       1785 
1786 
     | 
    
         
             
                pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
         
     | 
| 
       1786 
1787 
     | 
    
         
             
            }
         
     | 
| 
       1787 
1788 
     | 
    
         | 
| 
      
 1789 
     | 
    
         
            +
            /**
         
     | 
| 
      
 1790 
     | 
    
         
            +
             * Serialize the given list of comments to the given buffer.
         
     | 
| 
      
 1791 
     | 
    
         
            +
             */
         
     | 
| 
       1788 
1792 
     | 
    
         
             
            void
         
     | 
| 
       1789 
1793 
     | 
    
         
             
            pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
         
     | 
| 
       1790 
1794 
     | 
    
         
             
                pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
         
     | 
| 
         @@ -1838,6 +1842,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t * 
     | 
|
| 
       1838 
1842 
     | 
    
         
             
                }
         
     | 
| 
       1839 
1843 
     | 
    
         
             
            }
         
     | 
| 
       1840 
1844 
     | 
    
         | 
| 
      
 1845 
     | 
    
         
            +
            /**
         
     | 
| 
      
 1846 
     | 
    
         
            +
             * Serialize the name of the encoding to the buffer.
         
     | 
| 
      
 1847 
     | 
    
         
            +
             */
         
     | 
| 
       1841 
1848 
     | 
    
         
             
            void
         
     | 
| 
       1842 
1849 
     | 
    
         
             
            pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
         
     | 
| 
       1843 
1850 
     | 
    
         
             
                size_t encoding_length = strlen(encoding->name);
         
     | 
| 
         @@ -1845,10 +1852,14 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { 
     | 
|
| 
       1845 
1852 
     | 
    
         
             
                pm_buffer_append_string(buffer, encoding->name, encoding_length);
         
     | 
| 
       1846 
1853 
     | 
    
         
             
            }
         
     | 
| 
       1847 
1854 
     | 
    
         | 
| 
       1848 
     | 
    
         
            -
            #line  
     | 
| 
      
 1855 
     | 
    
         
            +
            #line 206 "serialize.c.erb"
         
     | 
| 
      
 1856 
     | 
    
         
            +
            /**
         
     | 
| 
      
 1857 
     | 
    
         
            +
             * Serialize the encoding, metadata, nodes, and constant pool.
         
     | 
| 
      
 1858 
     | 
    
         
            +
             */
         
     | 
| 
       1849 
1859 
     | 
    
         
             
            void
         
     | 
| 
       1850 
1860 
     | 
    
         
             
            pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
         
     | 
| 
       1851 
1861 
     | 
    
         
             
                pm_serialize_encoding(&parser->encoding, buffer);
         
     | 
| 
      
 1862 
     | 
    
         
            +
                pm_buffer_append_varint(buffer, parser->start_line);
         
     | 
| 
       1852 
1863 
     | 
    
         
             
                pm_serialize_comment_list(parser, &parser->comment_list, buffer);
         
     | 
| 
       1853 
1864 
     | 
    
         
             
                pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
         
     | 
| 
       1854 
1865 
     | 
    
         
             
                pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
         
     | 
| 
         @@ -1921,10 +1932,16 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { 
     | 
|
| 
       1921 
1932 
     | 
    
         
             
                pm_buffer_append_varint(buffer, parser->lex_state);
         
     | 
| 
       1922 
1933 
     | 
    
         
             
            }
         
     | 
| 
       1923 
1934 
     | 
    
         | 
| 
      
 1935 
     | 
    
         
            +
            /**
         
     | 
| 
      
 1936 
     | 
    
         
            +
             * Lex the given source and serialize to the given buffer.
         
     | 
| 
      
 1937 
     | 
    
         
            +
             */
         
     | 
| 
       1924 
1938 
     | 
    
         
             
            PRISM_EXPORTED_FUNCTION void
         
     | 
| 
       1925 
     | 
    
         
            -
             
     | 
| 
      
 1939 
     | 
    
         
            +
            pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
         
     | 
| 
      
 1940 
     | 
    
         
            +
                pm_options_t options = { 0 };
         
     | 
| 
      
 1941 
     | 
    
         
            +
                if (data != NULL) pm_options_read(&options, data);
         
     | 
| 
      
 1942 
     | 
    
         
            +
             
     | 
| 
       1926 
1943 
     | 
    
         
             
                pm_parser_t parser;
         
     | 
| 
       1927 
     | 
    
         
            -
                pm_parser_init(&parser, source, size,  
     | 
| 
      
 1944 
     | 
    
         
            +
                pm_parser_init(&parser, source, size, &options);
         
     | 
| 
       1928 
1945 
     | 
    
         | 
| 
       1929 
1946 
     | 
    
         
             
                pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
         
     | 
| 
       1930 
1947 
     | 
    
         
             
                    .data = (void *) buffer,
         
     | 
| 
         @@ -1934,10 +1951,11 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu 
     | 
|
| 
       1934 
1951 
     | 
    
         
             
                parser.lex_callback = &lex_callback;
         
     | 
| 
       1935 
1952 
     | 
    
         
             
                pm_node_t *node = pm_parse(&parser);
         
     | 
| 
       1936 
1953 
     | 
    
         | 
| 
       1937 
     | 
    
         
            -
                // Append 0 to mark end of tokens
         
     | 
| 
      
 1954 
     | 
    
         
            +
                // Append 0 to mark end of tokens.
         
     | 
| 
       1938 
1955 
     | 
    
         
             
                pm_buffer_append_byte(buffer, 0);
         
     | 
| 
       1939 
1956 
     | 
    
         | 
| 
       1940 
1957 
     | 
    
         
             
                pm_serialize_encoding(&parser.encoding, buffer);
         
     | 
| 
      
 1958 
     | 
    
         
            +
                pm_buffer_append_varint(buffer, parser.start_line);
         
     | 
| 
       1941 
1959 
     | 
    
         
             
                pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
         
     | 
| 
       1942 
1960 
     | 
    
         
             
                pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
         
     | 
| 
       1943 
1961 
     | 
    
         
             
                pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
         
     | 
| 
         @@ -1945,15 +1963,20 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu 
     | 
|
| 
       1945 
1963 
     | 
    
         | 
| 
       1946 
1964 
     | 
    
         
             
                pm_node_destroy(&parser, node);
         
     | 
| 
       1947 
1965 
     | 
    
         
             
                pm_parser_free(&parser);
         
     | 
| 
      
 1966 
     | 
    
         
            +
                pm_options_free(&options);
         
     | 
| 
       1948 
1967 
     | 
    
         
             
            }
         
     | 
| 
       1949 
1968 
     | 
    
         | 
| 
       1950 
     | 
    
         
            -
             
     | 
| 
       1951 
     | 
    
         
            -
             
     | 
| 
      
 1969 
     | 
    
         
            +
            /**
         
     | 
| 
      
 1970 
     | 
    
         
            +
             * Parse and serialize both the AST and the tokens represented by the given
         
     | 
| 
      
 1971 
     | 
    
         
            +
             * source to the given buffer.
         
     | 
| 
      
 1972 
     | 
    
         
            +
             */
         
     | 
| 
       1952 
1973 
     | 
    
         
             
            PRISM_EXPORTED_FUNCTION void
         
     | 
| 
       1953 
     | 
    
         
            -
             
     | 
| 
      
 1974 
     | 
    
         
            +
            pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
         
     | 
| 
      
 1975 
     | 
    
         
            +
                pm_options_t options = { 0 };
         
     | 
| 
      
 1976 
     | 
    
         
            +
                if (data != NULL) pm_options_read(&options, data);
         
     | 
| 
      
 1977 
     | 
    
         
            +
             
     | 
| 
       1954 
1978 
     | 
    
         
             
                pm_parser_t parser;
         
     | 
| 
       1955 
     | 
    
         
            -
                pm_parser_init(&parser, source, size,  
     | 
| 
       1956 
     | 
    
         
            -
                if (metadata) pm_parser_metadata(&parser, metadata);
         
     | 
| 
      
 1979 
     | 
    
         
            +
                pm_parser_init(&parser, source, size, &options);
         
     | 
| 
       1957 
1980 
     | 
    
         | 
| 
       1958 
1981 
     | 
    
         
             
                pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
         
     | 
| 
       1959 
1982 
     | 
    
         
             
                    .data = (void *) buffer,
         
     | 
| 
         @@ -1968,4 +1991,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, 
     | 
|
| 
       1968 
1991 
     | 
    
         | 
| 
       1969 
1992 
     | 
    
         
             
                pm_node_destroy(&parser, node);
         
     | 
| 
       1970 
1993 
     | 
    
         
             
                pm_parser_free(&parser);
         
     | 
| 
      
 1994 
     | 
    
         
            +
                pm_options_free(&options);
         
     | 
| 
       1971 
1995 
     | 
    
         
             
            }
         
     | 
    
        data/src/token_type.c
    CHANGED
    
    | 
         @@ -9,7 +9,9 @@ 
     | 
|
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
            #include "prism/ast.h"
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
            /**
         
     | 
| 
      
 13 
     | 
    
         
            +
             * Returns a string representation of the given token type.
         
     | 
| 
      
 14 
     | 
    
         
            +
             */
         
     | 
| 
       13 
15 
     | 
    
         
             
            PRISM_EXPORTED_FUNCTION const char *
         
     | 
| 
       14 
16 
     | 
    
         
             
            pm_token_type_to_str(pm_token_type_t token_type)
         
     | 
| 
       15 
17 
     | 
    
         
             
            {
         
     |