RubyGems - prism - Versions diffs - 0.16.0 → 0.17.0 - Mend

prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -1
data/Makefile +6 -0
data/README.md +1 -1
data/config.yml +50 -35
data/docs/fuzzing.md +1 -1
data/docs/serialization.md +28 -29
data/ext/prism/api_node.c +802 -770
data/ext/prism/api_pack.c +20 -9
data/ext/prism/extension.c +464 -162
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +3173 -763
data/include/prism/defines.h +32 -9
data/include/prism/diagnostic.h +36 -3
data/include/prism/enc/pm_encoding.h +118 -28
data/include/prism/node.h +38 -13
data/include/prism/options.h +204 -0
data/include/prism/pack.h +44 -33
data/include/prism/parser.h +445 -200
data/include/prism/prettyprint.h +12 -1
data/include/prism/regexp.h +16 -2
data/include/prism/util/pm_buffer.h +94 -16
data/include/prism/util/pm_char.h +162 -48
data/include/prism/util/pm_constant_pool.h +126 -32
data/include/prism/util/pm_list.h +68 -38
data/include/prism/util/pm_memchr.h +18 -3
data/include/prism/util/pm_newline_list.h +70 -27
data/include/prism/util/pm_state_stack.h +25 -7
data/include/prism/util/pm_string.h +115 -27
data/include/prism/util/pm_string_list.h +25 -6
data/include/prism/util/pm_strncasecmp.h +32 -0
data/include/prism/util/pm_strpbrk.h +31 -17
data/include/prism/version.h +27 -2
data/include/prism.h +224 -31
data/lib/prism/compiler.rb +6 -3
data/lib/prism/debug.rb +23 -7
data/lib/prism/dispatcher.rb +33 -18
data/lib/prism/dsl.rb +10 -5
data/lib/prism/ffi.rb +132 -80
data/lib/prism/lex_compat.rb +25 -15
data/lib/prism/mutation_compiler.rb +10 -5
data/lib/prism/node.rb +370 -135
data/lib/prism/node_ext.rb +1 -1
data/lib/prism/node_inspector.rb +1 -1
data/lib/prism/pack.rb +79 -40
data/lib/prism/parse_result/comments.rb +7 -2
data/lib/prism/parse_result/newlines.rb +4 -0
data/lib/prism/parse_result.rb +150 -30
data/lib/prism/pattern.rb +11 -0
data/lib/prism/ripper_compat.rb +28 -10
data/lib/prism/serialize.rb +86 -54
data/lib/prism/visitor.rb +10 -3
data/lib/prism.rb +20 -2
data/prism.gemspec +4 -2
data/rbi/prism.rbi +104 -60
data/rbi/prism_static.rbi +16 -2
data/sig/prism.rbs +72 -43
data/sig/prism_static.rbs +14 -1
data/src/diagnostic.c +56 -53
data/src/enc/pm_big5.c +1 -0
data/src/enc/pm_euc_jp.c +1 -0
data/src/enc/pm_gbk.c +1 -0
data/src/enc/pm_shift_jis.c +1 -0
data/src/enc/pm_tables.c +316 -80
data/src/enc/pm_unicode.c +53 -8
data/src/enc/pm_windows_31j.c +1 -0
data/src/node.c +334 -321
data/src/options.c +170 -0
data/src/prettyprint.c +74 -47
data/src/prism.c +1642 -856
data/src/regexp.c +151 -95
data/src/serialize.c +44 -20
data/src/token_type.c +3 -1
data/src/util/pm_buffer.c +45 -15
data/src/util/pm_char.c +103 -57
data/src/util/pm_constant_pool.c +51 -21
data/src/util/pm_list.c +12 -4
data/src/util/pm_memchr.c +5 -3
data/src/util/pm_newline_list.c +20 -12
data/src/util/pm_state_stack.c +9 -3
data/src/util/pm_string.c +95 -85
data/src/util/pm_string_list.c +14 -15
data/src/util/pm_strncasecmp.c +10 -3
data/src/util/pm_strpbrk.c +25 -19
metadata +5 -3
data/docs/prism.png +0 -0

data/src/enc/pm_unicode.c CHANGED Viewed

@@ -1,15 +1,14 @@
-// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
-// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
 #include "prism/enc/pm_encoding.h"
 typedef uint32_t pm_unicode_codepoint_t;
-// Each element of the following table contains a bitfield that indicates a
-// piece of information about the corresponding unicode codepoint. Note that
-// this table is different from other encodings where we used a lookup table
-// because the indices of those tables are the byte representations, not the
-// codepoints themselves.
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding unicode codepoint. Note that
+ * this table is different from other encodings where we used a lookup table
+ * because the indices of those tables are the byte representations, not the
+ * codepoints themselves.
+ */
 const uint8_t pm_encoding_unicode_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
     0x1F170, 0x1F189,
 };
+/**
+ * Binary search through the given list of codepoints to see if the given
+ * codepoint is in the list.
+ */
 static bool
 pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
     size_t start = 0;
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
     return false;
 }
+/**
+ * A state transition table for decoding UTF-8.
+ *
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
 static const uint8_t pm_utf_8_dfa[] = {
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
     1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
 };
+/**
+ * Given a pointer to a string and the number of bytes remaining in the string,
+ * decode the next UTF-8 codepoint and return it. The number of bytes consumed
+ * is returned in the width out parameter.
+ */
 static pm_unicode_codepoint_t
 pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
     assert(n >= 1);
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
     return width;
 }
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphabetical character.
+ */
 size_t
 pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
     }
 }
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphanumeric character.
+ */
 size_t
 pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
@@ -2285,6 +2324,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
     }
 }
+/**
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
+ * character.
+ */
 bool
 pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
 #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
 #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
+/** UTF-8 */
 pm_encoding_t pm_encoding_utf_8 = {
     .name = "utf-8",
     .char_width = pm_encoding_utf_8_char_width,
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
     .multibyte = true
 };
+/** UTF8-mac */
 pm_encoding_t pm_encoding_utf8_mac = {
     .name = "utf8-mac",
     .char_width = pm_encoding_utf_8_char_width,

data/src/enc/pm_windows_31j.c CHANGED Viewed

@@ -46,6 +46,7 @@ pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
     }
 }
+/** Windows-31J */
 pm_encoding_t pm_encoding_windows_31j = {
     .name = "windows-31j",
     .char_width = pm_encoding_windows_31j_char_width,