RubyGems - yarp - Versions diffs - 0.12.0 → 0.13.0 - Mend

yarp 0.12.0 → 0.13.0

Files changed (115) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +29 -8
data/CONTRIBUTING.md +2 -2
data/Makefile +5 -5
data/README.md +11 -12
data/config.yml +6 -2
data/docs/build_system.md +21 -21
data/docs/building.md +4 -4
data/docs/configuration.md +25 -21
data/docs/design.md +2 -2
data/docs/encoding.md +17 -17
data/docs/fuzzing.md +4 -4
data/docs/heredocs.md +3 -3
data/docs/mapping.md +94 -94
data/docs/ripper.md +4 -4
data/docs/ruby_api.md +11 -11
data/docs/serialization.md +17 -16
data/docs/testing.md +6 -6
data/ext/prism/api_node.c +4725 -0
data/ext/{yarp → prism}/api_pack.c +82 -82
data/ext/{yarp → prism}/extconf.rb +13 -13
data/ext/{yarp → prism}/extension.c +175 -168
data/ext/prism/extension.h +18 -0
data/include/prism/ast.h +1932 -0
data/include/prism/defines.h +45 -0
data/include/prism/diagnostic.h +231 -0
data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
data/include/prism/node.h +41 -0
data/include/prism/pack.h +141 -0
data/include/{yarp → prism}/parser.h +143 -142
data/include/prism/regexp.h +19 -0
data/include/prism/unescape.h +48 -0
data/include/prism/util/pm_buffer.h +51 -0
data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
data/include/prism/util/pm_memchr.h +14 -0
data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
data/include/prism/util/pm_state_stack.h +24 -0
data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
data/include/prism/util/pm_string_list.h +25 -0
data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
data/include/prism/version.h +4 -0
data/include/prism.h +82 -0
data/lib/prism/compiler.rb +465 -0
data/lib/prism/debug.rb +157 -0
data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
data/lib/prism/dispatcher.rb +2051 -0
data/lib/prism/dsl.rb +750 -0
data/lib/{yarp → prism}/ffi.rb +66 -67
data/lib/{yarp → prism}/lex_compat.rb +40 -43
data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
data/lib/{yarp → prism}/node.rb +2012 -2593
data/lib/prism/node_ext.rb +55 -0
data/lib/prism/node_inspector.rb +68 -0
data/lib/{yarp → prism}/pack.rb +1 -1
data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
data/lib/prism/parse_result.rb +266 -0
data/lib/{yarp → prism}/pattern.rb +14 -14
data/lib/{yarp → prism}/ripper_compat.rb +5 -5
data/lib/{yarp → prism}/serialize.rb +12 -7
data/lib/prism/visitor.rb +470 -0
data/lib/prism.rb +64 -0
data/lib/yarp.rb +2 -614
data/src/diagnostic.c +213 -208
data/src/enc/pm_big5.c +52 -0
data/src/enc/pm_euc_jp.c +58 -0
data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
data/src/enc/pm_shift_jis.c +56 -0
data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
data/src/enc/pm_windows_31j.c +56 -0
data/src/node.c +1293 -1233
data/src/pack.c +247 -247
data/src/prettyprint.c +1479 -1479
data/src/{yarp.c → prism.c} +5205 -5083
data/src/regexp.c +132 -132
data/src/serialize.c +1121 -1121
data/src/token_type.c +169 -167
data/src/unescape.c +106 -87
data/src/util/pm_buffer.c +103 -0
data/src/util/{yp_char.c → pm_char.c} +72 -72
data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
data/src/util/{yp_list.c → pm_list.c} +10 -10
data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
data/src/util/{yp_string.c → pm_string.c} +38 -38
data/src/util/pm_string_list.c +29 -0
data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
data/yarp.gemspec +68 -59
metadata +70 -61
data/ext/yarp/api_node.c +0 -4728
data/ext/yarp/extension.h +0 -18
data/include/yarp/ast.h +0 -1929
data/include/yarp/defines.h +0 -45
data/include/yarp/diagnostic.h +0 -226
data/include/yarp/node.h +0 -42
data/include/yarp/pack.h +0 -141
data/include/yarp/regexp.h +0 -19
data/include/yarp/unescape.h +0 -44
data/include/yarp/util/yp_buffer.h +0 -51
data/include/yarp/util/yp_memchr.h +0 -14
data/include/yarp/util/yp_state_stack.h +0 -24
data/include/yarp/util/yp_string_list.h +0 -25
data/include/yarp/version.h +0 -4
data/include/yarp.h +0 -82
data/src/enc/yp_big5.c +0 -52
data/src/enc/yp_euc_jp.c +0 -58
data/src/enc/yp_shift_jis.c +0 -56
data/src/enc/yp_windows_31j.c +0 -56
data/src/util/yp_buffer.c +0 -101
data/src/util/yp_string_list.c +0 -29

data/include/{yarp → prism}/parser.h RENAMED Viewed

@@ -1,13 +1,13 @@
-#ifndef YARP_PARSER_H
-#define YARP_PARSER_H
+#ifndef PRISM_PARSER_H
+#define PRISM_PARSER_H
-#include "yarp/ast.h"
-#include "yarp/defines.h"
-#include "yarp/enc/yp_encoding.h"
-#include "yarp/util/yp_constant_pool.h"
-#include "yarp/util/yp_list.h"
-#include "yarp/util/yp_newline_list.h"
-#include "yarp/util/yp_state_stack.h"
+#include "prism/ast.h"
+#include "prism/defines.h"
+#include "prism/enc/pm_encoding.h"
+#include "prism/util/pm_constant_pool.h"
+#include "prism/util/pm_list.h"
+#include "prism/util/pm_newline_list.h"
+#include "prism/util/pm_state_stack.h"
 #include <stdbool.h>
@@ -15,88 +15,88 @@
 // the lexer can track. This is used to determine which kind of token to return
 // based on the context of the parser.
 typedef enum {
-    YP_LEX_STATE_BIT_BEG,
-    YP_LEX_STATE_BIT_END,
-    YP_LEX_STATE_BIT_ENDARG,
-    YP_LEX_STATE_BIT_ENDFN,
-    YP_LEX_STATE_BIT_ARG,
-    YP_LEX_STATE_BIT_CMDARG,
-    YP_LEX_STATE_BIT_MID,
-    YP_LEX_STATE_BIT_FNAME,
-    YP_LEX_STATE_BIT_DOT,
-    YP_LEX_STATE_BIT_CLASS,
-    YP_LEX_STATE_BIT_LABEL,
-    YP_LEX_STATE_BIT_LABELED,
-    YP_LEX_STATE_BIT_FITEM
-} yp_lex_state_bit_t;
+    PM_LEX_STATE_BIT_BEG,
+    PM_LEX_STATE_BIT_END,
+    PM_LEX_STATE_BIT_ENDARG,
+    PM_LEX_STATE_BIT_ENDFN,
+    PM_LEX_STATE_BIT_ARG,
+    PM_LEX_STATE_BIT_CMDARG,
+    PM_LEX_STATE_BIT_MID,
+    PM_LEX_STATE_BIT_FNAME,
+    PM_LEX_STATE_BIT_DOT,
+    PM_LEX_STATE_BIT_CLASS,
+    PM_LEX_STATE_BIT_LABEL,
+    PM_LEX_STATE_BIT_LABELED,
+    PM_LEX_STATE_BIT_FITEM
+} pm_lex_state_bit_t;
 // This enum combines the various bits from the above enum into individual
 // values that represent the various states of the lexer.
 typedef enum {
-    YP_LEX_STATE_NONE = 0,
-    YP_LEX_STATE_BEG = (1 << YP_LEX_STATE_BIT_BEG),
-    YP_LEX_STATE_END = (1 << YP_LEX_STATE_BIT_END),
-    YP_LEX_STATE_ENDARG = (1 << YP_LEX_STATE_BIT_ENDARG),
-    YP_LEX_STATE_ENDFN = (1 << YP_LEX_STATE_BIT_ENDFN),
-    YP_LEX_STATE_ARG = (1 << YP_LEX_STATE_BIT_ARG),
-    YP_LEX_STATE_CMDARG = (1 << YP_LEX_STATE_BIT_CMDARG),
-    YP_LEX_STATE_MID = (1 << YP_LEX_STATE_BIT_MID),
-    YP_LEX_STATE_FNAME = (1 << YP_LEX_STATE_BIT_FNAME),
-    YP_LEX_STATE_DOT = (1 << YP_LEX_STATE_BIT_DOT),
-    YP_LEX_STATE_CLASS = (1 << YP_LEX_STATE_BIT_CLASS),
-    YP_LEX_STATE_LABEL = (1 << YP_LEX_STATE_BIT_LABEL),
-    YP_LEX_STATE_LABELED = (1 << YP_LEX_STATE_BIT_LABELED),
-    YP_LEX_STATE_FITEM = (1 << YP_LEX_STATE_BIT_FITEM),
-    YP_LEX_STATE_BEG_ANY = YP_LEX_STATE_BEG | YP_LEX_STATE_MID | YP_LEX_STATE_CLASS,
-    YP_LEX_STATE_ARG_ANY = YP_LEX_STATE_ARG | YP_LEX_STATE_CMDARG,
-    YP_LEX_STATE_END_ANY = YP_LEX_STATE_END | YP_LEX_STATE_ENDARG | YP_LEX_STATE_ENDFN
-} yp_lex_state_t;
+    PM_LEX_STATE_NONE = 0,
+    PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
+    PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
+    PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
+    PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
+    PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
+    PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
+    PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
+    PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
+    PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
+    PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
+    PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
+    PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
+    PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
+    PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
+    PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
+    PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
+} pm_lex_state_t;
 typedef enum {
-    YP_HEREDOC_QUOTE_NONE,
-    YP_HEREDOC_QUOTE_SINGLE = '\'',
-    YP_HEREDOC_QUOTE_DOUBLE = '"',
-    YP_HEREDOC_QUOTE_BACKTICK = '`',
-} yp_heredoc_quote_t;
+    PM_HEREDOC_QUOTE_NONE,
+    PM_HEREDOC_QUOTE_SINGLE = '\'',
+    PM_HEREDOC_QUOTE_DOUBLE = '"',
+    PM_HEREDOC_QUOTE_BACKTICK = '`',
+} pm_heredoc_quote_t;
 typedef enum {
-    YP_HEREDOC_INDENT_NONE,
-    YP_HEREDOC_INDENT_DASH,
-    YP_HEREDOC_INDENT_TILDE,
-} yp_heredoc_indent_t;
+    PM_HEREDOC_INDENT_NONE,
+    PM_HEREDOC_INDENT_DASH,
+    PM_HEREDOC_INDENT_TILDE,
+} pm_heredoc_indent_t;
 // When lexing Ruby source, the lexer has a small amount of state to tell which
 // kind of token it is currently lexing. For example, when we find the start of
 // a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
-// that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
+// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
 // are found as part of a string.
-typedef struct yp_lex_mode {
+typedef struct pm_lex_mode {
     enum {
         // This state is used when any given token is being lexed.
-        YP_LEX_DEFAULT,
+        PM_LEX_DEFAULT,
         // This state is used when we're lexing as normal but inside an embedded
         // expression of a string.
-        YP_LEX_EMBEXPR,
+        PM_LEX_EMBEXPR,
         // This state is used when we're lexing a variable that is embedded
         // directly inside of a string with the # shorthand.
-        YP_LEX_EMBVAR,
+        PM_LEX_EMBVAR,
         // This state is used when you are inside the content of a heredoc.
-        YP_LEX_HEREDOC,
+        PM_LEX_HEREDOC,
         // This state is used when we are lexing a list of tokens, as in a %w
         // word list literal or a %i symbol list literal.
-        YP_LEX_LIST,
+        PM_LEX_LIST,
         // This state is used when a regular expression has been begun and we
         // are looking for the terminator.
-        YP_LEX_REGEXP,
+        PM_LEX_REGEXP,
         // This state is used when we are lexing a string or a string-like
         // token, as in string content with either quote or an xstring.
-        YP_LEX_STRING
+        PM_LEX_STRING
     } mode;
     union {
@@ -166,8 +166,8 @@ typedef struct yp_lex_mode {
             const uint8_t *ident_start;
             size_t ident_length;
-            yp_heredoc_quote_t quote;
-            yp_heredoc_indent_t indent;
+            pm_heredoc_quote_t quote;
+            pm_heredoc_indent_t indent;
             // This is the pointer to the character where lexing should resume
             // once the heredoc has been completely processed.
@@ -176,83 +176,83 @@ typedef struct yp_lex_mode {
     } as;
     // The previous lex state so that it knows how to pop.
-    struct yp_lex_mode *prev;
-} yp_lex_mode_t;
+    struct pm_lex_mode *prev;
+} pm_lex_mode_t;
 // We pre-allocate a certain number of lex states in order to avoid having to
 // call malloc too many times while parsing. You really shouldn't need more than
 // this because you only really nest deeply when doing string interpolation.
-#define YP_LEX_STACK_SIZE 4
+#define PM_LEX_STACK_SIZE 4
 // A forward declaration since our error handler struct accepts a parser for
 // each of its function calls.
-typedef struct yp_parser yp_parser_t;
+typedef struct pm_parser pm_parser_t;
 // While parsing, we keep track of a stack of contexts. This is helpful for
 // error recovery so that we can pop back to a previous context when we hit a
 // token that is understood by a parent context but not by the current context.
 typedef enum {
-    YP_CONTEXT_BEGIN,          // a begin statement
-    YP_CONTEXT_BLOCK_BRACES,   // expressions in block arguments using braces
-    YP_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
-    YP_CONTEXT_CASE_WHEN,      // a case when statements
-    YP_CONTEXT_CASE_IN,        // a case in statements
-    YP_CONTEXT_CLASS,          // a class declaration
-    YP_CONTEXT_DEF,            // a method definition
-    YP_CONTEXT_DEF_PARAMS,     // a method definition's parameters
-    YP_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
-    YP_CONTEXT_ELSE,           // an else clause
-    YP_CONTEXT_ELSIF,          // an elsif clause
-    YP_CONTEXT_EMBEXPR,        // an interpolated expression
-    YP_CONTEXT_ENSURE,         // an ensure statement
-    YP_CONTEXT_FOR,            // a for loop
-    YP_CONTEXT_IF,             // an if statement
-    YP_CONTEXT_LAMBDA_BRACES,  // a lambda expression with braces
-    YP_CONTEXT_LAMBDA_DO_END,  // a lambda expression with do..end
-    YP_CONTEXT_MAIN,           // the top level context
-    YP_CONTEXT_MODULE,         // a module declaration
-    YP_CONTEXT_PARENS,         // a parenthesized expression
-    YP_CONTEXT_POSTEXE,        // an END block
-    YP_CONTEXT_PREDICATE,      // a predicate inside an if/elsif/unless statement
-    YP_CONTEXT_PREEXE,         // a BEGIN block
-    YP_CONTEXT_RESCUE_ELSE,    // a rescue else statement
-    YP_CONTEXT_RESCUE,         // a rescue statement
-    YP_CONTEXT_SCLASS,         // a singleton class definition
-    YP_CONTEXT_UNLESS,         // an unless statement
-    YP_CONTEXT_UNTIL,          // an until statement
-    YP_CONTEXT_WHILE,          // a while statement
-} yp_context_t;
+    PM_CONTEXT_BEGIN,          // a begin statement
+    PM_CONTEXT_BLOCK_BRACES,   // expressions in block arguments using braces
+    PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
+    PM_CONTEXT_CASE_WHEN,      // a case when statements
+    PM_CONTEXT_CASE_IN,        // a case in statements
+    PM_CONTEXT_CLASS,          // a class declaration
+    PM_CONTEXT_DEF,            // a method definition
+    PM_CONTEXT_DEF_PARAMS,     // a method definition's parameters
+    PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
+    PM_CONTEXT_ELSE,           // an else clause
+    PM_CONTEXT_ELSIF,          // an elsif clause
+    PM_CONTEXT_EMBEXPR,        // an interpolated expression
+    PM_CONTEXT_ENSURE,         // an ensure statement
+    PM_CONTEXT_FOR,            // a for loop
+    PM_CONTEXT_IF,             // an if statement
+    PM_CONTEXT_LAMBDA_BRACES,  // a lambda expression with braces
+    PM_CONTEXT_LAMBDA_DO_END,  // a lambda expression with do..end
+    PM_CONTEXT_MAIN,           // the top level context
+    PM_CONTEXT_MODULE,         // a module declaration
+    PM_CONTEXT_PARENS,         // a parenthesized expression
+    PM_CONTEXT_POSTEXE,        // an END block
+    PM_CONTEXT_PREDICATE,      // a predicate inside an if/elsif/unless statement
+    PM_CONTEXT_PREEXE,         // a BEGIN block
+    PM_CONTEXT_RESCUE_ELSE,    // a rescue else statement
+    PM_CONTEXT_RESCUE,         // a rescue statement
+    PM_CONTEXT_SCLASS,         // a singleton class definition
+    PM_CONTEXT_UNLESS,         // an unless statement
+    PM_CONTEXT_UNTIL,          // an until statement
+    PM_CONTEXT_WHILE,          // a while statement
+} pm_context_t;
 // This is a node in a linked list of contexts.
-typedef struct yp_context_node {
-    yp_context_t context;
-    struct yp_context_node *prev;
-} yp_context_node_t;
+typedef struct pm_context_node {
+    pm_context_t context;
+    struct pm_context_node *prev;
+} pm_context_node_t;
 // This is the type of a comment that we've found while parsing.
 typedef enum {
-    YP_COMMENT_INLINE,
-    YP_COMMENT_EMBDOC,
-    YP_COMMENT___END__
-} yp_comment_type_t;
+    PM_COMMENT_INLINE,
+    PM_COMMENT_EMBDOC,
+    PM_COMMENT___END__
+} pm_comment_type_t;
 // This is a node in the linked list of comments that we've found while parsing.
-typedef struct yp_comment {
-    yp_list_node_t node;
+typedef struct pm_comment {
+    pm_list_node_t node;
     const uint8_t *start;
     const uint8_t *end;
-    yp_comment_type_t type;
-} yp_comment_t;
+    pm_comment_type_t type;
+} pm_comment_t;
-// When the encoding that is being used to parse the source is changed by YARP,
+// When the encoding that is being used to parse the source is changed by prism,
 // we provide the ability here to call out to a user-defined function.
-typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
+typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
-// When an encoding is encountered that isn't understood by YARP, we provide
+// When an encoding is encountered that isn't understood by prism, we provide
 // the ability here to call out to a user-defined function to get an encoding
 // struct. If the function returns something that isn't NULL, we set that to
 // our encoding and use it to parse identifiers.
-typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
+typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
 // When you are lexing through a file, the lexer needs all of the information
 // that the parser additionally provides (for example, the local table). So if
@@ -268,17 +268,17 @@ typedef struct {
     // This is the callback that is called when a token is lexed. It is passed
     // the opaque data pointer, the parser, and the token that was lexed.
-    void (*callback)(void *data, yp_parser_t *parser, yp_token_t *token);
-} yp_lex_callback_t;
+    void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
+} pm_lex_callback_t;
 // This struct represents a node in a linked list of scopes. Some scopes can see
 // into their parent scopes, while others cannot.
-typedef struct yp_scope {
+typedef struct pm_scope {
     // The IDs of the locals in the given scope.
-    yp_constant_id_list_t locals;
+    pm_constant_id_list_t locals;
     // A pointer to the previous scope in the linked list.
-    struct yp_scope *previous;
+    struct pm_scope *previous;
     // A boolean indicating whether or not this scope can see into its parent.
     // If closed is true, then the scope cannot see into its parent.
@@ -293,14 +293,14 @@ typedef struct yp_scope {
     // This is necessary to determine if child blocks are allowed to use
     // numbered parameters.
     bool numbered_params;
-} yp_scope_t;
+} pm_scope_t;
 // This struct represents the overall parser. It contains a reference to the
 // source file, as well as pointers that indicate where in the source it's
 // currently parsing. It also contains the most recent and current token that
 // it's considering.
-struct yp_parser {
-    yp_lex_state_t lex_state; // the current state of the lexer
+struct pm_parser {
+    pm_lex_state_t lex_state; // the current state of the lexer
     int enclosure_nesting;    // tracks the current nesting of (), [], and {}
     // Used to temporarily track the nesting of enclosures to determine if a {
@@ -313,22 +313,22 @@ struct yp_parser {
     // the stack used to determine if a do keyword belongs to the predicate of a
     // while, until, or for loop
-    yp_state_stack_t do_loop_stack;
+    pm_state_stack_t do_loop_stack;
     // the stack used to determine if a do keyword belongs to the beginning of a
     // block
-    yp_state_stack_t accepts_block_stack;
+    pm_state_stack_t accepts_block_stack;
     struct {
-        yp_lex_mode_t *current;                 // the current mode of the lexer
-        yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer modes
+        pm_lex_mode_t *current;                 // the current mode of the lexer
+        pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
         size_t index;                           // the current index into the lexer mode stack
     } lex_modes;
     const uint8_t *start;   // the pointer to the start of the source
     const uint8_t *end;     // the pointer to the end of the source
-    yp_token_t previous; // the previous token we were considering
-    yp_token_t current;  // the current token we're considering
+    pm_token_t previous; // the previous token we were considering
+    pm_token_t current;  // the current token we're considering
     // This is a special field set on the parser when we need the parser to jump
     // to a specific location when lexing the next token, as opposed to just
@@ -341,26 +341,27 @@ struct yp_parser {
     // found on a line then this is NULL.
     const uint8_t *heredoc_end;
-    yp_list_t comment_list;             // the list of comments that have been found while parsing
-    yp_list_t warning_list;             // the list of warnings that have been found while parsing
-    yp_list_t error_list;               // the list of errors that have been found while parsing
-    yp_scope_t *current_scope;          // the current local scope
+    pm_list_t comment_list;             // the list of comments that have been found while parsing
+    pm_list_t warning_list;             // the list of warnings that have been found while parsing
+    pm_list_t error_list;               // the list of errors that have been found while parsing
+    pm_scope_t *current_scope;          // the current local scope
-    yp_context_node_t *current_context; // the current parsing context
+    pm_context_node_t *current_context; // the current parsing context
     // The encoding functions for the current file is attached to the parser as
     // it's parsing so that it can change with a magic comment.
-    yp_encoding_t encoding;
+    pm_encoding_t encoding;
     // When the encoding that is being used to parse the source is changed by
-    // YARP, we provide the ability here to call out to a user-defined function.
-    yp_encoding_changed_callback_t encoding_changed_callback;
+    // prism, we provide the ability here to call out to a user-defined
+    // function.
+    pm_encoding_changed_callback_t encoding_changed_callback;
-    // When an encoding is encountered that isn't understood by YARP, we provide
-    // the ability here to call out to a user-defined function to get an
+    // When an encoding is encountered that isn't understood by prism, we
+    // provide the ability here to call out to a user-defined function to get an
     // encoding struct. If the function returns something that isn't NULL, we
     // set that to our encoding and use it to parse identifiers.
-    yp_encoding_decode_callback_t encoding_decode_callback;
+    pm_encoding_decode_callback_t encoding_decode_callback;
     // This pointer indicates where a comment must start if it is to be
     // considered an encoding comment.
@@ -368,24 +369,24 @@ struct yp_parser {
     // This is an optional callback that can be attached to the parser that will
     // be called whenever a new token is lexed by the parser.
-    yp_lex_callback_t *lex_callback;
+    pm_lex_callback_t *lex_callback;
     // This is the path of the file being parsed
     // We use the filepath when constructing SourceFileNodes
-    yp_string_t filepath_string;
+    pm_string_t filepath_string;
     // This constant pool keeps all of the constants defined throughout the file
     // so that we can reference them later.
-    yp_constant_pool_t constant_pool;
+    pm_constant_pool_t constant_pool;
     // This is the list of newline offsets in the source file.
-    yp_newline_list_t newline_list;
+    pm_newline_list_t newline_list;
     // We want to add a flag to integer nodes that indicates their base. We only
     // want to parse these once, but we don't have space on the token itself to
     // communicate this information. So we store it here and pass it through
     // when we find tokens that we need it for.
-    yp_node_flags_t integer_base;
+    pm_node_flags_t integer_base;
     // Whether or not we're at the beginning of a command
     bool command_start;
@@ -414,4 +415,4 @@ struct yp_parser {
     bool frozen_string_literal;
 };
-#endif // YARP_PARSER_H
+#endif // PRISM_PARSER_H

data/include/prism/regexp.h ADDED Viewed

@@ -0,0 +1,19 @@
+#ifndef PRISM_REGEXP_H
+#define PRISM_REGEXP_H
+#include "prism/defines.h"
+#include "prism/parser.h"
+#include "prism/enc/pm_encoding.h"
+#include "prism/util/pm_memchr.h"
+#include "prism/util/pm_string_list.h"
+#include "prism/util/pm_string.h"
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+// Parse a regular expression and extract the names of all of the named capture
+// groups.
+PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
+#endif

data/include/prism/unescape.h ADDED Viewed

@@ -0,0 +1,48 @@
+#ifndef PRISM_UNESCAPE_H
+#define PRISM_UNESCAPE_H
+#include "prism/defines.h"
+#include "prism/diagnostic.h"
+#include "prism/parser.h"
+#include "prism/util/pm_char.h"
+#include "prism/util/pm_list.h"
+#include "prism/util/pm_memchr.h"
+#include "prism/util/pm_string.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+// The type of unescape we are performing.
+typedef enum {
+    // When we're creating a string inside of a list literal like %w, we
+    // shouldn't escape anything.
+    PM_UNESCAPE_NONE,
+    // When we're unescaping a single-quoted string, we only need to unescape
+    // single quotes and backslashes.
+    PM_UNESCAPE_MINIMAL,
+    // When we're unescaping a string list, in addition to MINIMAL, we need to
+    // unescape whitespace.
+    PM_UNESCAPE_WHITESPACE,
+    // When we're unescaping a double-quoted string, we need to unescape all
+    // escapes.
+    PM_UNESCAPE_ALL,
+} pm_unescape_type_t;
+// Unescape the contents of the given token into the given string using the given unescape mode.
+PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
+void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
+// Accepts a source string and a type of unescaping and returns the unescaped version.
+// The caller must pm_string_free(result); after calling this function.
+PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
+// Returns the number of bytes that encompass the first escape sequence in the
+// given string.
+size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
+#endif

data/include/prism/util/pm_buffer.h ADDED Viewed

@@ -0,0 +1,51 @@
+#ifndef PRISM_BUFFER_H
+#define PRISM_BUFFER_H
+#include "prism/defines.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+// A pm_buffer_t is a simple memory buffer that stores data in a contiguous
+// block of memory. It is used to store the serialized representation of a
+// prism tree.
+typedef struct {
+    char *value;
+    size_t length;
+    size_t capacity;
+} pm_buffer_t;
+// Return the size of the pm_buffer_t struct.
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
+// Initialize a pm_buffer_t with its default values.
+PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
+// Return the value of the buffer.
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(pm_buffer_t *buffer);
+// Return the length of the buffer.
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(pm_buffer_t *buffer);
+// Append the given amount of space as zeroes to the buffer.
+void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
+// Append a string to the buffer.
+void pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length);
+// Append a list of bytes to the buffer.
+void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
+// Append a single byte to the buffer.
+void pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value);
+// Append a 32-bit unsigned integer to the buffer.
+void pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value);
+// Free the memory associated with the buffer.
+PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
+#endif