RubyGems - jruby-prism-parser - Versions diffs - 0.23.0.pre.SNAPSHOT-java - Mend

jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +401 -0
data/CODE_OF_CONDUCT.md +76 -0
data/CONTRIBUTING.md +62 -0
data/LICENSE.md +7 -0
data/Makefile +101 -0
data/README.md +98 -0
data/config.yml +2902 -0
data/docs/build_system.md +91 -0
data/docs/configuration.md +64 -0
data/docs/cruby_compilation.md +27 -0
data/docs/design.md +53 -0
data/docs/encoding.md +121 -0
data/docs/fuzzing.md +88 -0
data/docs/heredocs.md +36 -0
data/docs/javascript.md +118 -0
data/docs/local_variable_depth.md +229 -0
data/docs/mapping.md +117 -0
data/docs/parser_translation.md +34 -0
data/docs/parsing_rules.md +19 -0
data/docs/releasing.md +98 -0
data/docs/ripper.md +36 -0
data/docs/ruby_api.md +43 -0
data/docs/ruby_parser_translation.md +19 -0
data/docs/serialization.md +209 -0
data/docs/testing.md +55 -0
data/ext/prism/api_node.c +5098 -0
data/ext/prism/api_pack.c +267 -0
data/ext/prism/extconf.rb +110 -0
data/ext/prism/extension.c +1155 -0
data/ext/prism/extension.h +18 -0
data/include/prism/ast.h +5807 -0
data/include/prism/defines.h +102 -0
data/include/prism/diagnostic.h +339 -0
data/include/prism/encoding.h +265 -0
data/include/prism/node.h +57 -0
data/include/prism/options.h +230 -0
data/include/prism/pack.h +152 -0
data/include/prism/parser.h +732 -0
data/include/prism/prettyprint.h +26 -0
data/include/prism/regexp.h +33 -0
data/include/prism/util/pm_buffer.h +155 -0
data/include/prism/util/pm_char.h +205 -0
data/include/prism/util/pm_constant_pool.h +209 -0
data/include/prism/util/pm_list.h +97 -0
data/include/prism/util/pm_memchr.h +29 -0
data/include/prism/util/pm_newline_list.h +93 -0
data/include/prism/util/pm_state_stack.h +42 -0
data/include/prism/util/pm_string.h +150 -0
data/include/prism/util/pm_string_list.h +44 -0
data/include/prism/util/pm_strncasecmp.h +32 -0
data/include/prism/util/pm_strpbrk.h +46 -0
data/include/prism/version.h +29 -0
data/include/prism.h +289 -0
data/jruby-prism.jar +0 -0
data/lib/prism/compiler.rb +486 -0
data/lib/prism/debug.rb +206 -0
data/lib/prism/desugar_compiler.rb +207 -0
data/lib/prism/dispatcher.rb +2150 -0
data/lib/prism/dot_visitor.rb +4634 -0
data/lib/prism/dsl.rb +785 -0
data/lib/prism/ffi.rb +346 -0
data/lib/prism/lex_compat.rb +908 -0
data/lib/prism/mutation_compiler.rb +753 -0
data/lib/prism/node.rb +17864 -0
data/lib/prism/node_ext.rb +212 -0
data/lib/prism/node_inspector.rb +68 -0
data/lib/prism/pack.rb +224 -0
data/lib/prism/parse_result/comments.rb +177 -0
data/lib/prism/parse_result/newlines.rb +64 -0
data/lib/prism/parse_result.rb +498 -0
data/lib/prism/pattern.rb +250 -0
data/lib/prism/serialize.rb +1354 -0
data/lib/prism/translation/parser/compiler.rb +1838 -0
data/lib/prism/translation/parser/lexer.rb +335 -0
data/lib/prism/translation/parser/rubocop.rb +37 -0
data/lib/prism/translation/parser.rb +178 -0
data/lib/prism/translation/ripper.rb +577 -0
data/lib/prism/translation/ruby_parser.rb +1521 -0
data/lib/prism/translation.rb +11 -0
data/lib/prism/version.rb +3 -0
data/lib/prism/visitor.rb +495 -0
data/lib/prism.rb +99 -0
data/prism.gemspec +135 -0
data/rbi/prism.rbi +7767 -0
data/rbi/prism_static.rbi +207 -0
data/sig/prism.rbs +4773 -0
data/sig/prism_static.rbs +201 -0
data/src/diagnostic.c +400 -0
data/src/encoding.c +5132 -0
data/src/node.c +2786 -0
data/src/options.c +213 -0
data/src/pack.c +493 -0
data/src/prettyprint.c +8881 -0
data/src/prism.c +18406 -0
data/src/regexp.c +638 -0
data/src/serialize.c +1554 -0
data/src/token_type.c +700 -0
data/src/util/pm_buffer.c +190 -0
data/src/util/pm_char.c +318 -0
data/src/util/pm_constant_pool.c +322 -0
data/src/util/pm_list.c +49 -0
data/src/util/pm_memchr.c +35 -0
data/src/util/pm_newline_list.c +84 -0
data/src/util/pm_state_stack.c +25 -0
data/src/util/pm_string.c +203 -0
data/src/util/pm_string_list.c +28 -0
data/src/util/pm_strncasecmp.c +24 -0
data/src/util/pm_strpbrk.c +180 -0
metadata +156 -0

data/include/prism/defines.h ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * @file defines.h
+ *
+ * Macro definitions used throughout the prism library.
+ *
+ * This file should be included first by any *.h or *.c in prism for consistency
+ * and to ensure that the macros are defined before they are used.
+ */
+#ifndef PRISM_DEFINES_H
+#define PRISM_DEFINES_H
+#include <ctype.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+/**
+ * We want to be able to use the PRI* macros for printing out integers, but on
+ * some platforms they aren't included unless this is already defined.
+ */
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+/**
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
+#ifndef PRISM_EXPORTED_FUNCTION
+#   ifdef PRISM_EXPORT_SYMBOLS
+#       ifdef _WIN32
+#          define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
+#       else
+#          define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
+#       endif
+#   else
+#       define PRISM_EXPORTED_FUNCTION
+#   endif
+#endif
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
+#elif defined(__clang__)
+#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
+#else
+#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
+#endif
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#   define PRISM_ATTRIBUTE_UNUSED
+#endif
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
+#if defined(_MSC_VER) && !defined(inline)
+#   define inline __inline
+#endif
+/**
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
+ * implement _snprintf. We standard that here.
+ */
+#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
+#   define snprintf _snprintf
+#endif
+/**
+ * A simple utility macro to concatenate two tokens together, necessary when one
+ * of the tokens is itself a macro.
+ */
+#define PM_CONCATENATE(left, right) left ## right
+/**
+ * We want to be able to use static assertions, but they weren't standardized
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
+ * fail to compile due to a negative array size if the condition is false.
+ */
+#if defined(_Static_assert)
+#   define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
+#else
+#   define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
+#endif
+#endif

data/include/prism/diagnostic.h ADDED Viewed

@@ -0,0 +1,339 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
+#ifndef PRISM_DIAGNOSTIC_H
+#define PRISM_DIAGNOSTIC_H
+#include "prism/ast.h"
+#include "prism/defines.h"
+#include "prism/util/pm_list.h"
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+/**
+ * The levels of errors generated during parsing.
+ */
+typedef enum {
+    /** For errors that cannot be recovered from. */
+    PM_ERROR_LEVEL_FATAL = 0,
+    /** For errors that should raise an argument error. */
+    PM_ERROR_LEVEL_ARGUMENT = 1
+} pm_error_level_t;
+/**
+ * The levels of warnings generated during parsing.
+ */
+typedef enum {
+    /** For warnings which should be emitted if $VERBOSE != nil. */
+    PM_WARNING_LEVEL_DEFAULT = 0,
+    /** For warnings which should be emitted if $VERBOSE == true. */
+    PM_WARNING_LEVEL_VERBOSE = 1
+} pm_warning_level_t;
+/**
+ * This struct represents a diagnostic generated during parsing.
+ *
+ * @extends pm_list_node_t
+ */
+typedef struct {
+    /** The embedded base node. */
+    pm_list_node_t node;
+    /** The location of the diagnostic in the source. */
+    pm_location_t location;
+    /** The message associated with the diagnostic. */
+    const char *message;
+    /**
+     * Whether or not the memory related to the message of this diagnostic is
+     * owned by this diagnostic. If it is, it needs to be freed when the
+     * diagnostic is freed.
+     */
+    bool owned;
+    /**
+     * The level of the diagnostic, see `pm_error_level_t` and
+     * `pm_warning_level_t` for possible values.
+     */
+    uint8_t level;
+} pm_diagnostic_t;
+/**
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+    // This is a special error that we can potentially replace by others. For
+    // an example of how this is used, see parse_expression_prefix.
+    PM_ERR_CANNOT_PARSE_EXPRESSION,
+    // These are the error codes.
+    PM_ERR_ALIAS_ARGUMENT,
+    PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
+    PM_ERR_ARGUMENT_AFTER_BLOCK,
+    PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
+    PM_ERR_ARGUMENT_BARE_HASH,
+    PM_ERR_ARGUMENT_BLOCK_FORWARDING,
+    PM_ERR_ARGUMENT_BLOCK_MULTI,
+    PM_ERR_ARGUMENT_FORMAL_CLASS,
+    PM_ERR_ARGUMENT_FORMAL_CONSTANT,
+    PM_ERR_ARGUMENT_FORMAL_GLOBAL,
+    PM_ERR_ARGUMENT_FORMAL_IVAR,
+    PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
+    PM_ERR_ARGUMENT_IN,
+    PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
+    PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+    PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
+    PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
+    PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
+    PM_ERR_ARGUMENT_TERM_PAREN,
+    PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
+    PM_ERR_ARRAY_ELEMENT,
+    PM_ERR_ARRAY_EXPRESSION,
+    PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
+    PM_ERR_ARRAY_SEPARATOR,
+    PM_ERR_ARRAY_TERM,
+    PM_ERR_BEGIN_LONELY_ELSE,
+    PM_ERR_BEGIN_TERM,
+    PM_ERR_BEGIN_UPCASE_BRACE,
+    PM_ERR_BEGIN_UPCASE_TERM,
+    PM_ERR_BEGIN_UPCASE_TOPLEVEL,
+    PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
+    PM_ERR_BLOCK_PARAM_PIPE_TERM,
+    PM_ERR_BLOCK_TERM_BRACE,
+    PM_ERR_BLOCK_TERM_END,
+    PM_ERR_CANNOT_PARSE_STRING_PART,
+    PM_ERR_CASE_EXPRESSION_AFTER_CASE,
+    PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
+    PM_ERR_CASE_MATCH_MISSING_PREDICATE,
+    PM_ERR_CASE_MISSING_CONDITIONS,
+    PM_ERR_CASE_TERM,
+    PM_ERR_CLASS_IN_METHOD,
+    PM_ERR_CLASS_NAME,
+    PM_ERR_CLASS_SUPERCLASS,
+    PM_ERR_CLASS_TERM,
+    PM_ERR_CLASS_UNEXPECTED_END,
+    PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
+    PM_ERR_CONDITIONAL_IF_PREDICATE,
+    PM_ERR_CONDITIONAL_PREDICATE_TERM,
+    PM_ERR_CONDITIONAL_TERM,
+    PM_ERR_CONDITIONAL_TERM_ELSE,
+    PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
+    PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
+    PM_ERR_CONDITIONAL_WHILE_PREDICATE,
+    PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
+    PM_ERR_DEF_ENDLESS,
+    PM_ERR_DEF_ENDLESS_SETTER,
+    PM_ERR_DEF_NAME,
+    PM_ERR_DEF_NAME_AFTER_RECEIVER,
+    PM_ERR_DEF_PARAMS_TERM,
+    PM_ERR_DEF_PARAMS_TERM_PAREN,
+    PM_ERR_DEF_RECEIVER,
+    PM_ERR_DEF_RECEIVER_TERM,
+    PM_ERR_DEF_TERM,
+    PM_ERR_DEFINED_EXPRESSION,
+    PM_ERR_EMBDOC_TERM,
+    PM_ERR_EMBEXPR_END,
+    PM_ERR_EMBVAR_INVALID,
+    PM_ERR_END_UPCASE_BRACE,
+    PM_ERR_END_UPCASE_TERM,
+    PM_ERR_ESCAPE_INVALID_CONTROL,
+    PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
+    PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
+    PM_ERR_ESCAPE_INVALID_META,
+    PM_ERR_ESCAPE_INVALID_META_REPEAT,
+    PM_ERR_ESCAPE_INVALID_UNICODE,
+    PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
+    PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
+    PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
+    PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
+    PM_ERR_EXPECT_ARGUMENT,
+    PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
+    PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
+    PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
+    PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
+    PM_ERR_EXPECT_RBRACKET,
+    PM_ERR_EXPECT_RPAREN,
+    PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
+    PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
+    PM_ERR_EXPECT_STRING_CONTENT,
+    PM_ERR_EXPECT_WHEN_DELIMITER,
+    PM_ERR_EXPRESSION_BARE_HASH,
+    PM_ERR_FOR_COLLECTION,
+    PM_ERR_FOR_IN,
+    PM_ERR_FOR_INDEX,
+    PM_ERR_FOR_TERM,
+    PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
+    PM_ERR_HASH_KEY,
+    PM_ERR_HASH_ROCKET,
+    PM_ERR_HASH_TERM,
+    PM_ERR_HASH_VALUE,
+    PM_ERR_HEREDOC_TERM,
+    PM_ERR_INCOMPLETE_QUESTION_MARK,
+    PM_ERR_INCOMPLETE_VARIABLE_CLASS,
+    PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
+    PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
+    PM_ERR_INVALID_FLOAT_EXPONENT,
+    PM_ERR_INVALID_NUMBER_BINARY,
+    PM_ERR_INVALID_NUMBER_DECIMAL,
+    PM_ERR_INVALID_NUMBER_HEXADECIMAL,
+    PM_ERR_INVALID_NUMBER_OCTAL,
+    PM_ERR_INVALID_NUMBER_UNDERSCORE,
+    PM_ERR_INVALID_CHARACTER,
+    PM_ERR_INVALID_MULTIBYTE_CHARACTER,
+    PM_ERR_INVALID_PRINTABLE_CHARACTER,
+    PM_ERR_INVALID_PERCENT,
+    PM_ERR_INVALID_VARIABLE_GLOBAL,
+    PM_ERR_IT_NOT_ALLOWED,
+    PM_ERR_LAMBDA_OPEN,
+    PM_ERR_LAMBDA_TERM_BRACE,
+    PM_ERR_LAMBDA_TERM_END,
+    PM_ERR_LIST_I_LOWER_ELEMENT,
+    PM_ERR_LIST_I_LOWER_TERM,
+    PM_ERR_LIST_I_UPPER_ELEMENT,
+    PM_ERR_LIST_I_UPPER_TERM,
+    PM_ERR_LIST_W_LOWER_ELEMENT,
+    PM_ERR_LIST_W_LOWER_TERM,
+    PM_ERR_LIST_W_UPPER_ELEMENT,
+    PM_ERR_LIST_W_UPPER_TERM,
+    PM_ERR_MALLOC_FAILED,
+    PM_ERR_MIXED_ENCODING,
+    PM_ERR_MODULE_IN_METHOD,
+    PM_ERR_MODULE_NAME,
+    PM_ERR_MODULE_TERM,
+    PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
+    PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
+    PM_ERR_NOT_EXPRESSION,
+    PM_ERR_NO_LOCAL_VARIABLE,
+    PM_ERR_NUMBER_LITERAL_UNDERSCORE,
+    PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
+    PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
+    PM_ERR_OPERATOR_MULTI_ASSIGN,
+    PM_ERR_OPERATOR_WRITE_ARGUMENTS,
+    PM_ERR_OPERATOR_WRITE_BLOCK,
+    PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
+    PM_ERR_PARAMETER_BLOCK_MULTI,
+    PM_ERR_PARAMETER_CIRCULAR,
+    PM_ERR_PARAMETER_METHOD_NAME,
+    PM_ERR_PARAMETER_NAME_REPEAT,
+    PM_ERR_PARAMETER_NO_DEFAULT,
+    PM_ERR_PARAMETER_NO_DEFAULT_KW,
+    PM_ERR_PARAMETER_NUMBERED_RESERVED,
+    PM_ERR_PARAMETER_ORDER,
+    PM_ERR_PARAMETER_SPLAT_MULTI,
+    PM_ERR_PARAMETER_STAR,
+    PM_ERR_PARAMETER_UNEXPECTED_FWD,
+    PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
+    PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
+    PM_ERR_PATTERN_HASH_KEY,
+    PM_ERR_PATTERN_HASH_KEY_LABEL,
+    PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
+    PM_ERR_PATTERN_LABEL_AFTER_COMMA,
+    PM_ERR_PATTERN_REST,
+    PM_ERR_PATTERN_TERM_BRACE,
+    PM_ERR_PATTERN_TERM_BRACKET,
+    PM_ERR_PATTERN_TERM_PAREN,
+    PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
+    PM_ERR_REGEXP_TERM,
+    PM_ERR_RESCUE_EXPRESSION,
+    PM_ERR_RESCUE_MODIFIER_VALUE,
+    PM_ERR_RESCUE_TERM,
+    PM_ERR_RESCUE_VARIABLE,
+    PM_ERR_RETURN_INVALID,
+    PM_ERR_SINGLETON_FOR_LITERALS,
+    PM_ERR_STATEMENT_ALIAS,
+    PM_ERR_STATEMENT_POSTEXE_END,
+    PM_ERR_STATEMENT_PREEXE_BEGIN,
+    PM_ERR_STATEMENT_UNDEF,
+    PM_ERR_STRING_CONCATENATION,
+    PM_ERR_STRING_INTERPOLATED_TERM,
+    PM_ERR_STRING_LITERAL_EOF,
+    PM_ERR_STRING_LITERAL_TERM,
+    PM_ERR_SYMBOL_INVALID,
+    PM_ERR_SYMBOL_TERM_DYNAMIC,
+    PM_ERR_SYMBOL_TERM_INTERPOLATED,
+    PM_ERR_TERNARY_COLON,
+    PM_ERR_TERNARY_EXPRESSION_FALSE,
+    PM_ERR_TERNARY_EXPRESSION_TRUE,
+    PM_ERR_UNARY_RECEIVER,
+    PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
+    PM_ERR_UNEXPECTED_TOKEN_IGNORE,
+    PM_ERR_UNDEF_ARGUMENT,
+    PM_ERR_UNTIL_TERM,
+    PM_ERR_VOID_EXPRESSION,
+    PM_ERR_WHILE_TERM,
+    PM_ERR_WRITE_TARGET_IN_METHOD,
+    PM_ERR_WRITE_TARGET_READONLY,
+    PM_ERR_WRITE_TARGET_UNEXPECTED,
+    PM_ERR_XSTRING_TERM,
+    // These are the warning codes.
+    PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
+    PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
+    PM_WARN_AMBIGUOUS_PREFIX_STAR,
+    PM_WARN_AMBIGUOUS_SLASH,
+    PM_WARN_END_IN_METHOD,
+    // This is the number of diagnostic codes.
+    PM_DIAGNOSTIC_ID_LEN,
+} pm_diagnostic_id_t;
+/**
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @param ... The arguments to the format string for the message.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ *
+ * @param list The list to deallocate.
+ */
+void pm_diagnostic_list_free(pm_list_t *list);
+#endif

data/include/prism/encoding.h ADDED Viewed

@@ -0,0 +1,265 @@
+/**
+ * @file encoding.h
+ *
+ * The encoding interface and implementations used by the parser.
+ */
+#ifndef PRISM_ENCODING_H
+#define PRISM_ENCODING_H
+#include "prism/defines.h"
+#include "prism/util/pm_strncasecmp.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+/**
+ * This struct defines the functions necessary to implement the encoding
+ * interface so we can determine how many bytes the subsequent character takes.
+ * Each callback should return the number of bytes, or 0 if the next bytes are
+ * invalid for the encoding and type.
+ */
+typedef struct {
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding. Does not read more than n bytes. It is assumed that n is
+     * at least 1.
+     */
+    size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding and is alphabetical. Does not read more than n bytes. It
+     * is assumed that n is at least 1.
+     */
+    size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding and is alphanumeric. Does not read more than n bytes. It
+     * is assumed that n is at least 1.
+     */
+    size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
+    /**
+     * Return true if the next character is valid in the encoding and is an
+     * uppercase character. Does not read more than n bytes. It is assumed that
+     * n is at least 1.
+     */
+    bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
+    /**
+     * The name of the encoding. This should correspond to a value that can be
+     * passed to Encoding.find in Ruby.
+     */
+    const char *name;
+    /**
+     * Return true if the encoding is a multibyte encoding.
+     */
+    bool multibyte;
+} pm_encoding_t;
+/**
+ * All of the lookup tables use the first bit of each embedded byte to indicate
+ * whether the codepoint is alphabetical.
+ */
+#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
+/**
+ * All of the lookup tables use the second bit of each embedded byte to indicate
+ * whether the codepoint is alphanumeric.
+ */
+#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
+/**
+ * All of the lookup tables use the third bit of each embedded byte to indicate
+ * whether the codepoint is uppercase.
+ */
+#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
+/**
+ * Return the size of the next character in the UTF-8 encoding.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphabetical character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphanumeric character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
+/**
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
+ * character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns True if the next character is valid in the encoding and is an
+ *     uppercase character, or false if it is not.
+ */
+bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
+/**
+ * This lookup table is referenced in both the UTF-8 encoding file and the
+ * parser directly in order to speed up the default encoding processing. It is
+ * used to indicate whether a character is alphabetical, alphanumeric, or
+ * uppercase in unicode mappings.
+ */
+extern const uint8_t pm_encoding_unicode_table[256];
+/**
+ * These are all of the encodings that prism supports.
+ */
+typedef enum {
+    PM_ENCODING_UTF_8 = 0,
+    PM_ENCODING_ASCII_8BIT,
+    PM_ENCODING_BIG5,
+    PM_ENCODING_BIG5_HKSCS,
+    PM_ENCODING_BIG5_UAO,
+    PM_ENCODING_CESU_8,
+    PM_ENCODING_CP51932,
+    PM_ENCODING_CP850,
+    PM_ENCODING_CP852,
+    PM_ENCODING_CP855,
+    PM_ENCODING_CP949,
+    PM_ENCODING_CP950,
+    PM_ENCODING_CP951,
+    PM_ENCODING_EMACS_MULE,
+    PM_ENCODING_EUC_JP,
+    PM_ENCODING_EUC_JP_MS,
+    PM_ENCODING_EUC_JIS_2004,
+    PM_ENCODING_EUC_KR,
+    PM_ENCODING_EUC_TW,
+    PM_ENCODING_GB12345,
+    PM_ENCODING_GB18030,
+    PM_ENCODING_GB1988,
+    PM_ENCODING_GB2312,
+    PM_ENCODING_GBK,
+    PM_ENCODING_IBM437,
+    PM_ENCODING_IBM720,
+    PM_ENCODING_IBM737,
+    PM_ENCODING_IBM775,
+    PM_ENCODING_IBM852,
+    PM_ENCODING_IBM855,
+    PM_ENCODING_IBM857,
+    PM_ENCODING_IBM860,
+    PM_ENCODING_IBM861,
+    PM_ENCODING_IBM862,
+    PM_ENCODING_IBM863,
+    PM_ENCODING_IBM864,
+    PM_ENCODING_IBM865,
+    PM_ENCODING_IBM866,
+    PM_ENCODING_IBM869,
+    PM_ENCODING_ISO_8859_1,
+    PM_ENCODING_ISO_8859_2,
+    PM_ENCODING_ISO_8859_3,
+    PM_ENCODING_ISO_8859_4,
+    PM_ENCODING_ISO_8859_5,
+    PM_ENCODING_ISO_8859_6,
+    PM_ENCODING_ISO_8859_7,
+    PM_ENCODING_ISO_8859_8,
+    PM_ENCODING_ISO_8859_9,
+    PM_ENCODING_ISO_8859_10,
+    PM_ENCODING_ISO_8859_11,
+    PM_ENCODING_ISO_8859_13,
+    PM_ENCODING_ISO_8859_14,
+    PM_ENCODING_ISO_8859_15,
+    PM_ENCODING_ISO_8859_16,
+    PM_ENCODING_KOI8_R,
+    PM_ENCODING_KOI8_U,
+    PM_ENCODING_MAC_CENT_EURO,
+    PM_ENCODING_MAC_CROATIAN,
+    PM_ENCODING_MAC_CYRILLIC,
+    PM_ENCODING_MAC_GREEK,
+    PM_ENCODING_MAC_ICELAND,
+    PM_ENCODING_MAC_JAPANESE,
+    PM_ENCODING_MAC_ROMAN,
+    PM_ENCODING_MAC_ROMANIA,
+    PM_ENCODING_MAC_THAI,
+    PM_ENCODING_MAC_TURKISH,
+    PM_ENCODING_MAC_UKRAINE,
+    PM_ENCODING_SHIFT_JIS,
+    PM_ENCODING_SJIS_DOCOMO,
+    PM_ENCODING_SJIS_KDDI,
+    PM_ENCODING_SJIS_SOFTBANK,
+    PM_ENCODING_STATELESS_ISO_2022_JP,
+    PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
+    PM_ENCODING_TIS_620,
+    PM_ENCODING_US_ASCII,
+    PM_ENCODING_UTF8_MAC,
+    PM_ENCODING_UTF8_DOCOMO,
+    PM_ENCODING_UTF8_KDDI,
+    PM_ENCODING_UTF8_SOFTBANK,
+    PM_ENCODING_WINDOWS_1250,
+    PM_ENCODING_WINDOWS_1251,
+    PM_ENCODING_WINDOWS_1252,
+    PM_ENCODING_WINDOWS_1253,
+    PM_ENCODING_WINDOWS_1254,
+    PM_ENCODING_WINDOWS_1255,
+    PM_ENCODING_WINDOWS_1256,
+    PM_ENCODING_WINDOWS_1257,
+    PM_ENCODING_WINDOWS_1258,
+    PM_ENCODING_WINDOWS_31J,
+    PM_ENCODING_WINDOWS_874,
+    PM_ENCODING_MAXIMUM
+} pm_encoding_type_t;
+/**
+ * This is the table of all of the encodings that prism supports.
+ */
+extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
+/**
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
+ * create parsers.
+ */
+#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
+/**
+ * This is the US-ASCII encoding. We need a reference to it to be able to
+ * compare against it when a string is being created because it could possibly
+ * need to fall back to ASCII-8BIT.
+ */
+#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
+/**
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
+ * can compare against it because invalid multibyte characters are not a thing
+ * in this encoding.
+ */
+#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
+/**
+ * Parse the given name of an encoding and return a pointer to the corresponding
+ * encoding struct if one can be found, otherwise return NULL.
+ *
+ * @param start A pointer to the first byte of the name.
+ * @param end A pointer to the last byte of the name.
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
+ */
+const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
+#endif