RubyGems - prism - Versions diffs - 0.15.1 → 0.17.0 - Mend

prism 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +35 -1
data/Makefile +12 -0
data/README.md +3 -1
data/config.yml +66 -50
data/docs/configuration.md +2 -0
data/docs/fuzzing.md +1 -1
data/docs/javascript.md +90 -0
data/docs/releasing.md +27 -0
data/docs/ruby_api.md +2 -0
data/docs/serialization.md +28 -29
data/ext/prism/api_node.c +856 -826
data/ext/prism/api_pack.c +20 -9
data/ext/prism/extension.c +494 -119
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +3157 -747
data/include/prism/defines.h +40 -8
data/include/prism/diagnostic.h +36 -3
data/include/prism/enc/pm_encoding.h +119 -28
data/include/prism/node.h +38 -30
data/include/prism/options.h +204 -0
data/include/prism/pack.h +44 -33
data/include/prism/parser.h +445 -199
data/include/prism/prettyprint.h +26 -0
data/include/prism/regexp.h +16 -2
data/include/prism/util/pm_buffer.h +102 -18
data/include/prism/util/pm_char.h +162 -48
data/include/prism/util/pm_constant_pool.h +128 -34
data/include/prism/util/pm_list.h +68 -38
data/include/prism/util/pm_memchr.h +18 -3
data/include/prism/util/pm_newline_list.h +71 -28
data/include/prism/util/pm_state_stack.h +25 -7
data/include/prism/util/pm_string.h +115 -27
data/include/prism/util/pm_string_list.h +25 -6
data/include/prism/util/pm_strncasecmp.h +32 -0
data/include/prism/util/pm_strpbrk.h +31 -17
data/include/prism/version.h +28 -3
data/include/prism.h +229 -36
data/lib/prism/compiler.rb +5 -5
data/lib/prism/debug.rb +43 -13
data/lib/prism/desugar_compiler.rb +1 -1
data/lib/prism/dispatcher.rb +27 -26
data/lib/prism/dsl.rb +16 -16
data/lib/prism/ffi.rb +138 -61
data/lib/prism/lex_compat.rb +26 -16
data/lib/prism/mutation_compiler.rb +11 -11
data/lib/prism/node.rb +426 -227
data/lib/prism/node_ext.rb +23 -16
data/lib/prism/node_inspector.rb +1 -1
data/lib/prism/pack.rb +79 -40
data/lib/prism/parse_result/comments.rb +7 -2
data/lib/prism/parse_result/newlines.rb +4 -0
data/lib/prism/parse_result.rb +157 -21
data/lib/prism/pattern.rb +14 -3
data/lib/prism/ripper_compat.rb +28 -10
data/lib/prism/serialize.rb +935 -307
data/lib/prism/visitor.rb +9 -5
data/lib/prism.rb +20 -2
data/prism.gemspec +11 -2
data/rbi/prism.rbi +7305 -0
data/rbi/prism_static.rbi +196 -0
data/sig/prism.rbs +4468 -0
data/sig/prism_static.rbs +123 -0
data/src/diagnostic.c +56 -53
data/src/enc/pm_big5.c +1 -0
data/src/enc/pm_euc_jp.c +1 -0
data/src/enc/pm_gbk.c +1 -0
data/src/enc/pm_shift_jis.c +1 -0
data/src/enc/pm_tables.c +316 -80
data/src/enc/pm_unicode.c +54 -9
data/src/enc/pm_windows_31j.c +1 -0
data/src/node.c +357 -345
data/src/options.c +170 -0
data/src/prettyprint.c +7697 -1643
data/src/prism.c +1964 -1125
data/src/regexp.c +153 -95
data/src/serialize.c +432 -397
data/src/token_type.c +3 -1
data/src/util/pm_buffer.c +88 -23
data/src/util/pm_char.c +103 -57
data/src/util/pm_constant_pool.c +52 -22
data/src/util/pm_list.c +12 -4
data/src/util/pm_memchr.c +5 -3
data/src/util/pm_newline_list.c +25 -63
data/src/util/pm_state_stack.c +9 -3
data/src/util/pm_string.c +95 -85
data/src/util/pm_string_list.c +14 -15
data/src/util/pm_strncasecmp.c +10 -3
data/src/util/pm_strpbrk.c +25 -19
metadata +12 -3
data/docs/prism.png +0 -0

data/src/regexp.c CHANGED Viewed

@@ -1,16 +1,31 @@
 #include "prism/regexp.h"
-// This is the parser that is going to handle parsing regular expressions.
+/**
+ * This is the parser that is going to handle parsing regular expressions.
+ */
 typedef struct {
+    /** A pointer to the start of the source that we are parsing. */
     const uint8_t *start;
+    /** A pointer to the current position in the source. */
     const uint8_t *cursor;
+    /** A pointer to the end of the source that we are parsing. */
     const uint8_t *end;
+    /** A list of named captures that we've found. */
     pm_string_list_t *named_captures;
+    /** Whether the encoding has changed from the default. */
     bool encoding_changed;
+    /** The encoding of the source. */
     pm_encoding_t *encoding;
 } pm_regexp_parser_t;
-// This initializes a new parser with the given source.
+/**
+ * This initializes a new parser with the given source.
+ */
 static void
 pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
     *parser = (pm_regexp_parser_t) {
@@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
     };
 }
-// This appends a new string to the list of named captures.
+/**
+ * This appends a new string to the list of named captures.
+ */
 static void
 pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
     pm_string_t string;
@@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
     pm_string_free(&string);
 }
-// Returns true if the next character is the end of the source.
+/**
+ * Returns true if the next character is the end of the source.
+ */
 static inline bool
 pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
     return parser->cursor >= parser->end;
 }
-// Optionally accept a char and consume it if it exists.
+/**
+ * Optionally accept a char and consume it if it exists.
+ */
 static inline bool
 pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
     if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
     return false;
 }
-// Expect a character to be present and consume it.
+/**
+ * Expect a character to be present and consume it.
+ */
 static inline bool
 pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
     if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
     return false;
 }
-// This advances the current token to the next instance of the given character.
+/**
+ * This advances the current token to the next instance of the given character.
+ */
 static bool
 pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
     if (pm_regexp_char_is_eof(parser)) {
@@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
     return true;
 }
-// Range quantifiers are a special class of quantifiers that look like
-//
-// * {digit}
-// * {digit,}
-// * {digit,digit}
-// * {,digit}
-//
-// Unfortunately, if there are any spaces in between, then this just becomes a
-// regular character match expression and we have to backtrack. So when this
-// function first starts running, we'll create a "save" point and then attempt
-// to parse the quantifier. If it fails, we'll restore the save point and
-// return.
-//
-// The properly track everything, we're going to build a little state machine.
-// It looks something like the following:
-//
-//                  ┌───────┐                 ┌─────────┐ ────────────┐
-// ──── lbrace ───> │ start │ ──── digit ───> │ minimum │             │
-//                  └───────┘                 └─────────┘ <─── digit ─┘
-//                      │                       │    │
-//   ┌───────┐          │                       │  rbrace
-//   │ comma │ <───── comma  ┌──── comma ───────┘    │
-//   └───────┘               V                       V
-//      │             ┌─────────┐               ┌─────────┐
-//      └── digit ──> │ maximum │ ── rbrace ──> │| final |│
-//                    └─────────┘               └─────────┘
-//                    │         ^
-//                    └─ digit ─┘
-//
-// Note that by the time we've hit this function, the lbrace has already been
-// consumed so we're in the start state.
+/**
+ * Range quantifiers are a special class of quantifiers that look like
+ *
+ * * {digit}
+ * * {digit,}
+ * * {digit,digit}
+ * * {,digit}
+ *
+ * Unfortunately, if there are any spaces in between, then this just becomes a
+ * regular character match expression and we have to backtrack. So when this
+ * function first starts running, we'll create a "save" point and then attempt
+ * to parse the quantifier. If it fails, we'll restore the save point and
+ * return.
+ *
+ * The properly track everything, we're going to build a little state machine.
+ * It looks something like the following:
+ *
+ *                  ┌───────┐                 ┌─────────┐ ────────────┐
+ * ──── lbrace ───> │ start │ ──── digit ───> │ minimum │             │
+ *                  └───────┘                 └─────────┘ <─── digit ─┘
+ *                      │                       │    │
+ *   ┌───────┐          │                       │  rbrace
+ *   │ comma │ <───── comma  ┌──── comma ───────┘    │
+ *   └───────┘               V                       V
+ *      │             ┌─────────┐               ┌─────────┐
+ *      └── digit ──> │ maximum │ ── rbrace ──> │| final |│
+ *                    └─────────┘               └─────────┘
+ *                    │         ^
+ *                    └─ digit ─┘
+ *
+ * Note that by the time we've hit this function, the lbrace has already been
+ * consumed so we're in the start state.
+ */
 static bool
 pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
     const uint8_t *savepoint = parser->cursor;
@@ -180,14 +207,18 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
     return true;
 }
-// quantifier : star-quantifier
-//            | plus-quantifier
-//            | optional-quantifier
-//            | range-quantifier
-//            | <empty>
-//            ;
+/**
+ * quantifier : star-quantifier
+ *            | plus-quantifier
+ *            | optional-quantifier
+ *            | range-quantifier
+ *            | <empty>
+ *            ;
+ */
 static bool
 pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
+    if (pm_regexp_char_is_eof(parser)) return true;
     switch (*parser->cursor) {
         case '*':
         case '+':
@@ -203,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
     }
 }
-// match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
-//                   ;
+/**
+ * match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
+ *                   ;
+ */
 static bool
 pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
     if (!pm_regexp_char_expect(parser, ':')) {
@@ -224,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
 static bool
 pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
-// match-char-set : '[' '^'? (match-range | match-char)* ']'
-//                ;
+/**
+ * match-char-set : '[' '^'? (match-range | match-char)* ']'
+ *                ;
+ */
 static bool
 pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
     pm_regexp_char_accept(parser, '^');
@@ -249,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
     return pm_regexp_char_expect(parser, ']');
 }
-// A left bracket can either mean a POSIX class or a character set.
+/**
+ * A left bracket can either mean a POSIX class or a character set.
+ */
 static bool
 pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
     const uint8_t *reset = parser->cursor;
@@ -269,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
 static bool
 pm_regexp_parse_expression(pm_regexp_parser_t *parser);
-// These are the states of the options that are configurable on the regular
-// expression (or from within a group).
+/**
+ * These are the states of the options that are configurable on the regular
+ * expression (or from within a group).
+ */
 typedef enum {
     PM_REGEXP_OPTION_STATE_INVALID,
     PM_REGEXP_OPTION_STATE_TOGGLEABLE,
@@ -281,16 +320,22 @@ typedef enum {
 // These are the options that are configurable on the regular expression (or
 // from within a group).
 #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
 #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
 #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
-// This is the set of options that are configurable on the regular expression.
+/**
+ * This is the set of options that are configurable on the regular expression.
+ */
 typedef struct {
+    /** The current state of each option. */
     uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
 } pm_regexp_options_t;
-// Initialize a new set of options to their default values.
+/**
+ * Initialize a new set of options to their default values.
+ */
 static void
 pm_regexp_options_init(pm_regexp_options_t *options) {
     memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
@@ -302,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
     options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
 }
-// Attempt to add the given option to the set of options. Returns true if it was
-// added, false if it was already present.
+/**
+ * Attempt to add the given option to the set of options. Returns true if it was
+ * added, false if it was already present.
+ */
 static bool
 pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
     if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -325,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
     return false;
 }
-// Attempt to remove the given option from the set of options. Returns true if
-// it was removed, false if it was already absent.
+/**
+ * Attempt to remove the given option from the set of options. Returns true if
+ * it was removed, false if it was already absent.
+ */
 static bool
 pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
     if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -347,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
     return false;
 }
-// Groups can have quite a few different patterns for syntax. They basically
-// just wrap a set of expressions, but they can potentially have options after a
-// question mark. If there _isn't_ a question mark, then it's just a set of
-// expressions. If there _is_, then here are the options:
-//
-// * (?#...)                       - inline comments
-// * (?:subexp)                    - non-capturing group
-// * (?=subexp)                    - positive lookahead
-// * (?!subexp)                    - negative lookahead
-// * (?>subexp)                    - atomic group
-// * (?~subexp)                    - absence operator
-// * (?<=subexp)                   - positive lookbehind
-// * (?<!subexp)                   - negative lookbehind
-// * (?<name>subexp)               - named capturing group
-// * (?'name'subexp)               - named capturing group
-// * (?(cond)yes-subexp)           - conditional expression
-// * (?(cond)yes-subexp|no-subexp) - conditional expression
-// * (?imxdau-imx)                 - turn on and off configuration
-// * (?imxdau-imx:subexp)          - turn on and off configuration for an expression
-//
+/**
+ * Groups can have quite a few different patterns for syntax. They basically
+ * just wrap a set of expressions, but they can potentially have options after a
+ * question mark. If there _isn't_ a question mark, then it's just a set of
+ * expressions. If there _is_, then here are the options:
+ *
+ * * (?#...)                       - inline comments
+ * * (?:subexp)                    - non-capturing group
+ * * (?=subexp)                    - positive lookahead
+ * * (?!subexp)                    - negative lookahead
+ * * (?>subexp)                    - atomic group
+ * * (?~subexp)                    - absence operator
+ * * (?<=subexp)                   - positive lookbehind
+ * * (?<!subexp)                   - negative lookbehind
+ * * (?<name>subexp)               - named capturing group
+ * * (?'name'subexp)               - named capturing group
+ * * (?(cond)yes-subexp)           - conditional expression
+ * * (?(cond)yes-subexp|no-subexp) - conditional expression
+ * * (?imxdau-imx)                 - turn on and off configuration
+ * * (?imxdau-imx:subexp)          - turn on and off configuration for an expression
+ */
 static bool
 pm_regexp_parse_group(pm_regexp_parser_t *parser) {
     // First, parse any options for the group.
@@ -501,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
     return pm_regexp_char_expect(parser, ')');
 }
-// item : anchor
-//      | match-posix-class
-//      | match-char-set
-//      | match-char-class
-//      | match-char-prop
-//      | match-char
-//      | match-any
-//      | group
-//      | quantified
-//      ;
+/**
+ * item : anchor
+ *      | match-posix-class
+ *      | match-char-set
+ *      | match-char-class
+ *      | match-char-prop
+ *      | match-char
+ *      | match-any
+ *      | group
+ *      | quantified
+ *      ;
+ */
 static bool
 pm_regexp_parse_item(pm_regexp_parser_t *parser) {
     switch (*parser->cursor++) {
@@ -531,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
     }
 }
-// expression : item+
-//            ;
+/**
+ * expression : item+
+ *            ;
+ */
 static bool
 pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
     if (!pm_regexp_parse_item(parser)) {
@@ -548,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
     return true;
 }
-// pattern : EOF
-//         | expression EOF
-//         | expression '|' pattern
-//         ;
+/**
+ * pattern : EOF
+ *         | expression EOF
+ *         | expression '|' pattern
+ *         ;
+ */
 static bool
 pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
     return (
@@ -570,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
     );
 }
-// Parse a regular expression and extract the names of all of the named capture
-// groups.
+/**
+ * Parse a regular expression and extract the names of all of the named capture
+ * groups.
+ */
 PRISM_EXPORTED_FUNCTION bool
 pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
     pm_regexp_parser_t parser;