RubyGems - jruby-prism-parser - Versions diffs - 0.23.0.pre.SNAPSHOT-java - Mend

jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +401 -0
data/CODE_OF_CONDUCT.md +76 -0
data/CONTRIBUTING.md +62 -0
data/LICENSE.md +7 -0
data/Makefile +101 -0
data/README.md +98 -0
data/config.yml +2902 -0
data/docs/build_system.md +91 -0
data/docs/configuration.md +64 -0
data/docs/cruby_compilation.md +27 -0
data/docs/design.md +53 -0
data/docs/encoding.md +121 -0
data/docs/fuzzing.md +88 -0
data/docs/heredocs.md +36 -0
data/docs/javascript.md +118 -0
data/docs/local_variable_depth.md +229 -0
data/docs/mapping.md +117 -0
data/docs/parser_translation.md +34 -0
data/docs/parsing_rules.md +19 -0
data/docs/releasing.md +98 -0
data/docs/ripper.md +36 -0
data/docs/ruby_api.md +43 -0
data/docs/ruby_parser_translation.md +19 -0
data/docs/serialization.md +209 -0
data/docs/testing.md +55 -0
data/ext/prism/api_node.c +5098 -0
data/ext/prism/api_pack.c +267 -0
data/ext/prism/extconf.rb +110 -0
data/ext/prism/extension.c +1155 -0
data/ext/prism/extension.h +18 -0
data/include/prism/ast.h +5807 -0
data/include/prism/defines.h +102 -0
data/include/prism/diagnostic.h +339 -0
data/include/prism/encoding.h +265 -0
data/include/prism/node.h +57 -0
data/include/prism/options.h +230 -0
data/include/prism/pack.h +152 -0
data/include/prism/parser.h +732 -0
data/include/prism/prettyprint.h +26 -0
data/include/prism/regexp.h +33 -0
data/include/prism/util/pm_buffer.h +155 -0
data/include/prism/util/pm_char.h +205 -0
data/include/prism/util/pm_constant_pool.h +209 -0
data/include/prism/util/pm_list.h +97 -0
data/include/prism/util/pm_memchr.h +29 -0
data/include/prism/util/pm_newline_list.h +93 -0
data/include/prism/util/pm_state_stack.h +42 -0
data/include/prism/util/pm_string.h +150 -0
data/include/prism/util/pm_string_list.h +44 -0
data/include/prism/util/pm_strncasecmp.h +32 -0
data/include/prism/util/pm_strpbrk.h +46 -0
data/include/prism/version.h +29 -0
data/include/prism.h +289 -0
data/jruby-prism.jar +0 -0
data/lib/prism/compiler.rb +486 -0
data/lib/prism/debug.rb +206 -0
data/lib/prism/desugar_compiler.rb +207 -0
data/lib/prism/dispatcher.rb +2150 -0
data/lib/prism/dot_visitor.rb +4634 -0
data/lib/prism/dsl.rb +785 -0
data/lib/prism/ffi.rb +346 -0
data/lib/prism/lex_compat.rb +908 -0
data/lib/prism/mutation_compiler.rb +753 -0
data/lib/prism/node.rb +17864 -0
data/lib/prism/node_ext.rb +212 -0
data/lib/prism/node_inspector.rb +68 -0
data/lib/prism/pack.rb +224 -0
data/lib/prism/parse_result/comments.rb +177 -0
data/lib/prism/parse_result/newlines.rb +64 -0
data/lib/prism/parse_result.rb +498 -0
data/lib/prism/pattern.rb +250 -0
data/lib/prism/serialize.rb +1354 -0
data/lib/prism/translation/parser/compiler.rb +1838 -0
data/lib/prism/translation/parser/lexer.rb +335 -0
data/lib/prism/translation/parser/rubocop.rb +37 -0
data/lib/prism/translation/parser.rb +178 -0
data/lib/prism/translation/ripper.rb +577 -0
data/lib/prism/translation/ruby_parser.rb +1521 -0
data/lib/prism/translation.rb +11 -0
data/lib/prism/version.rb +3 -0
data/lib/prism/visitor.rb +495 -0
data/lib/prism.rb +99 -0
data/prism.gemspec +135 -0
data/rbi/prism.rbi +7767 -0
data/rbi/prism_static.rbi +207 -0
data/sig/prism.rbs +4773 -0
data/sig/prism_static.rbs +201 -0
data/src/diagnostic.c +400 -0
data/src/encoding.c +5132 -0
data/src/node.c +2786 -0
data/src/options.c +213 -0
data/src/pack.c +493 -0
data/src/prettyprint.c +8881 -0
data/src/prism.c +18406 -0
data/src/regexp.c +638 -0
data/src/serialize.c +1554 -0
data/src/token_type.c +700 -0
data/src/util/pm_buffer.c +190 -0
data/src/util/pm_char.c +318 -0
data/src/util/pm_constant_pool.c +322 -0
data/src/util/pm_list.c +49 -0
data/src/util/pm_memchr.c +35 -0
data/src/util/pm_newline_list.c +84 -0
data/src/util/pm_state_stack.c +25 -0
data/src/util/pm_string.c +203 -0
data/src/util/pm_string_list.c +28 -0
data/src/util/pm_strncasecmp.c +24 -0
data/src/util/pm_strpbrk.c +180 -0
metadata +156 -0

data/src/util/pm_string.c ADDED Viewed

@@ -0,0 +1,203 @@
+#include "prism/util/pm_string.h"
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_sizeof(void) {
+    return sizeof(pm_string_t);
+}
+/**
+ * Initialize a shared string that is based on initial input.
+ */
+void
+pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
+    assert(start <= end);
+    *string = (pm_string_t) {
+        .type = PM_STRING_SHARED,
+        .source = start,
+        .length = (size_t) (end - start)
+    };
+}
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ */
+void
+pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_OWNED,
+        .source = source,
+        .length = length
+    };
+}
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ */
+void
+pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_CONSTANT,
+        .source = (const uint8_t *) source,
+        .length = length
+    };
+}
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ */
+bool
+pm_string_mapped_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (file == INVALID_HANDLE_VALUE) {
+        return false;
+    }
+    // Get the file size.
+    DWORD file_size = GetFileSize(file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        CloseHandle(file);
+        return false;
+    }
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        CloseHandle(file);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+    // Create a mapping of the file.
+    HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        CloseHandle(file);
+        return false;
+    }
+    // Map the file into memory.
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
+    CloseHandle(file);
+    if (source == NULL) {
+        return false;
+    }
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
+    return true;
+#else
+    // Open the file for reading
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+    // Stat the file to get the file size
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        return false;
+    }
+    // mmap the file descriptor to virtually get the contents
+    size_t size = (size_t) sb.st_size;
+    uint8_t *source = NULL;
+    if (size == 0) {
+        close(fd);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+    source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (source == MAP_FAILED) {
+        return false;
+    }
+    close(fd);
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
+    return true;
+#endif
+}
+/**
+ * Returns the memory size associated with the string.
+ */
+size_t
+pm_string_memsize(const pm_string_t *string) {
+    size_t size = sizeof(pm_string_t);
+    if (string->type == PM_STRING_OWNED) {
+        size += string->length;
+    }
+    return size;
+}
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ */
+void
+pm_string_ensure_owned(pm_string_t *string) {
+    if (string->type == PM_STRING_OWNED) return;
+    size_t length = pm_string_length(string);
+    const uint8_t *source = pm_string_source(string);
+    uint8_t *memory = malloc(length);
+    if (!memory) return;
+    pm_string_owned_init(string, memory, length);
+    memcpy((void *) string->source, source, length);
+}
+/**
+ * Returns the length associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_length(const pm_string_t *string) {
+    return string->length;
+}
+/**
+ * Returns the start pointer associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t *
+pm_string_source(const pm_string_t *string) {
+    return string->source;
+}
+/**
+ * Free the associated memory of the given string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_string_free(pm_string_t *string) {
+    void *memory = (void *) string->source;
+    if (string->type == PM_STRING_OWNED) {
+        free(memory);
+    } else if (string->type == PM_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+        UnmapViewOfFile(memory);
+#else
+        munmap(memory, string->length);
+#endif
+    }
+}

data/src/util/pm_string_list.c ADDED Viewed

@@ -0,0 +1,28 @@
+#include "prism/util/pm_string_list.h"
+/**
+ * Append a pm_string_t to the given string list.
+ */
+void
+pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
+    if (string_list->length + 1 > string_list->capacity) {
+        if (string_list->capacity == 0) {
+            string_list->capacity = 1;
+        } else {
+            string_list->capacity *= 2;
+        }
+        string_list->strings = realloc(string_list->strings, string_list->capacity * sizeof(pm_string_t));
+        if (string_list->strings == NULL) abort();
+    }
+    string_list->strings[string_list->length++] = *string;
+}
+/**
+ * Free the memory associated with the string list
+ */
+void
+pm_string_list_free(pm_string_list_t *string_list) {
+    free(string_list->strings);
+}

data/src/util/pm_strncasecmp.c ADDED Viewed

@@ -0,0 +1,24 @@
+#include "prism/util/pm_strncasecmp.h"
+/**
+ * Compare two strings, ignoring case, up to the given length. Returns 0 if the
+ * strings are equal, a negative number if string1 is less than string2, or a
+ * positive number if string1 is greater than string2.
+ *
+ * Note that this is effectively our own implementation of strncasecmp, but it's
+ * not available on all of the platforms we want to support so we're rolling it
+ * here.
+ */
+int
+pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
+    size_t offset = 0;
+    int difference = 0;
+    while (offset < length && string1[offset] != '\0') {
+        if (string2[offset] == '\0') return string1[offset];
+        if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
+        offset++;
+    }
+    return difference;
+}

data/src/util/pm_strpbrk.c ADDED Viewed

@@ -0,0 +1,180 @@
+#include "prism/util/pm_strpbrk.h"
+/**
+ * Add an invalid multibyte character error to the parser.
+ */
+static inline void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
+}
+/**
+ * This is the default path.
+ */
+static inline const uint8_t *
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+    return NULL;
+}
+/**
+ * This is the path when the encoding is ASCII-8BIT.
+ */
+static inline const uint8_t *
+pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        index++;
+    }
+    return NULL;
+}
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static inline const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+    return NULL;
+}
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static inline const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+        if (source[index] < 0x80 || !validate) {
+            index++;
+        } else {
+            size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (width > 0) {
+                index += width;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+                do {
+                    index++;
+                } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+    return NULL;
+}
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
+const uint8_t *
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
+    if (length <= 0) {
+        return NULL;
+    } else if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
+    } else if (parser->encoding->multibyte) {
+        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
+    } else {
+        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
+    }
+}

metadata ADDED Viewed

@@ -0,0 +1,156 @@
+--- !ruby/object:Gem::Specification
+name: jruby-prism-parser
+version: !ruby/object:Gem::Version
+  version: 0.23.0.pre.SNAPSHOT
+platform: java
+authors:
+- JRuby Team
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2024-02-16 00:00:00.000000000 Z
+dependencies: []
+description:
+email:
+- admin@jruby.org
+executables: []
+extensions:
+- ext/prism/extconf.rb
+extra_rdoc_files: []
+files:
+- CHANGELOG.md
+- CODE_OF_CONDUCT.md
+- CONTRIBUTING.md
+- LICENSE.md
+- Makefile
+- README.md
+- config.yml
+- docs/build_system.md
+- docs/configuration.md
+- docs/cruby_compilation.md
+- docs/design.md
+- docs/encoding.md
+- docs/fuzzing.md
+- docs/heredocs.md
+- docs/javascript.md
+- docs/local_variable_depth.md
+- docs/mapping.md
+- docs/parser_translation.md
+- docs/parsing_rules.md
+- docs/releasing.md
+- docs/ripper.md
+- docs/ruby_api.md
+- docs/ruby_parser_translation.md
+- docs/serialization.md
+- docs/testing.md
+- ext/prism/api_node.c
+- ext/prism/api_pack.c
+- ext/prism/extconf.rb
+- ext/prism/extension.c
+- ext/prism/extension.h
+- include/prism.h
+- include/prism/ast.h
+- include/prism/defines.h
+- include/prism/diagnostic.h
+- include/prism/encoding.h
+- include/prism/node.h
+- include/prism/options.h
+- include/prism/pack.h
+- include/prism/parser.h
+- include/prism/prettyprint.h
+- include/prism/regexp.h
+- include/prism/util/pm_buffer.h
+- include/prism/util/pm_char.h
+- include/prism/util/pm_constant_pool.h
+- include/prism/util/pm_list.h
+- include/prism/util/pm_memchr.h
+- include/prism/util/pm_newline_list.h
+- include/prism/util/pm_state_stack.h
+- include/prism/util/pm_string.h
+- include/prism/util/pm_string_list.h
+- include/prism/util/pm_strncasecmp.h
+- include/prism/util/pm_strpbrk.h
+- include/prism/version.h
+- jruby-prism.jar
+- lib/prism.rb
+- lib/prism/compiler.rb
+- lib/prism/debug.rb
+- lib/prism/desugar_compiler.rb
+- lib/prism/dispatcher.rb
+- lib/prism/dot_visitor.rb
+- lib/prism/dsl.rb
+- lib/prism/ffi.rb
+- lib/prism/lex_compat.rb
+- lib/prism/mutation_compiler.rb
+- lib/prism/node.rb
+- lib/prism/node_ext.rb
+- lib/prism/node_inspector.rb
+- lib/prism/pack.rb
+- lib/prism/parse_result.rb
+- lib/prism/parse_result/comments.rb
+- lib/prism/parse_result/newlines.rb
+- lib/prism/pattern.rb
+- lib/prism/serialize.rb
+- lib/prism/translation.rb
+- lib/prism/translation/parser.rb
+- lib/prism/translation/parser/compiler.rb
+- lib/prism/translation/parser/lexer.rb
+- lib/prism/translation/parser/rubocop.rb
+- lib/prism/translation/ripper.rb
+- lib/prism/translation/ruby_parser.rb
+- lib/prism/version.rb
+- lib/prism/visitor.rb
+- prism.gemspec
+- rbi/prism.rbi
+- rbi/prism_static.rbi
+- sig/prism.rbs
+- sig/prism_static.rbs
+- src/diagnostic.c
+- src/encoding.c
+- src/node.c
+- src/options.c
+- src/pack.c
+- src/prettyprint.c
+- src/prism.c
+- src/regexp.c
+- src/serialize.c
+- src/token_type.c
+- src/util/pm_buffer.c
+- src/util/pm_char.c
+- src/util/pm_constant_pool.c
+- src/util/pm_list.c
+- src/util/pm_memchr.c
+- src/util/pm_newline_list.c
+- src/util/pm_state_stack.c
+- src/util/pm_string.c
+- src/util/pm_string_list.c
+- src/util/pm_strncasecmp.c
+- src/util/pm_strpbrk.c
+homepage: https://github.com/jruby/jruby-prism
+licenses:
+- MIT
+metadata:
+  allowed_push_host: https://rubygems.org
+  source_code_uri: https://github.com/ruby/prism
+  changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.7.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 1.3.1
+requirements:
+- jar org.jruby, jruby-prism, 0.23.0-SNAPSHOT, :scope => :runtime
+rubygems_version: 3.3.26
+signing_key:
+specification_version: 4
+summary: Prism JRuby Parser Support
+test_files: []