prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/enc/pm_unicode.c CHANGED
@@ -1,15 +1,14 @@
1
- // Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
2
- // decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
3
-
4
1
  #include "prism/enc/pm_encoding.h"
5
2
 
6
3
  typedef uint32_t pm_unicode_codepoint_t;
7
4
 
8
- // Each element of the following table contains a bitfield that indicates a
9
- // piece of information about the corresponding unicode codepoint. Note that
10
- // this table is different from other encodings where we used a lookup table
11
- // because the indices of those tables are the byte representations, not the
12
- // codepoints themselves.
5
+ /**
6
+ * Each element of the following table contains a bitfield that indicates a
7
+ * piece of information about the corresponding unicode codepoint. Note that
8
+ * this table is different from other encodings where we used a lookup table
9
+ * because the indices of those tables are the byte representations, not the
10
+ * codepoints themselves.
11
+ */
13
12
  const uint8_t pm_encoding_unicode_table[256] = {
14
13
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
14
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
2179
2178
  0x1F170, 0x1F189,
2180
2179
  };
2181
2180
 
2181
+ /**
2182
+ * Binary search through the given list of codepoints to see if the given
2183
+ * codepoint is in the list.
2184
+ */
2182
2185
  static bool
2183
2186
  pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
2184
2187
  size_t start = 0;
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
2202
2205
  return false;
2203
2206
  }
2204
2207
 
2208
+ /**
2209
+ * A state transition table for decoding UTF-8.
2210
+ *
2211
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
2212
+ *
2213
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
2214
+ * of this software and associated documentation files (the "Software"), to deal
2215
+ * in the Software without restriction, including without limitation the rights
2216
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
2217
+ * copies of the Software, and to permit persons to whom the Software is
2218
+ * furnished to do so, subject to the following conditions:
2219
+ *
2220
+ * The above copyright notice and this permission notice shall be included in
2221
+ * all copies or substantial portions of the Software.
2222
+ *
2223
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2224
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2225
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2226
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2227
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2228
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2229
+ * SOFTWARE.
2230
+ */
2205
2231
  static const uint8_t pm_utf_8_dfa[] = {
2206
2232
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
2207
2233
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
2219
2245
  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
2220
2246
  };
2221
2247
 
2248
+ /**
2249
+ * Given a pointer to a string and the number of bytes remaining in the string,
2250
+ * decode the next UTF-8 codepoint and return it. The number of bytes consumed
2251
+ * is returned in the width out parameter.
2252
+ */
2222
2253
  static pm_unicode_codepoint_t
2223
2254
  pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2255
  assert(n >= 1);
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2253
2284
  return width;
2254
2285
  }
2255
2286
 
2287
+ /**
2288
+ * Return the size of the next character in the UTF-8 encoding if it is an
2289
+ * alphabetical character.
2290
+ */
2256
2291
  size_t
2257
2292
  pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
2293
  if (*b < 0x80) {
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2269
2304
  }
2270
2305
  }
2271
2306
 
2307
+ /**
2308
+ * Return the size of the next character in the UTF-8 encoding if it is an
2309
+ * alphanumeric character.
2310
+ */
2272
2311
  size_t
2273
2312
  pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
2313
  if (*b < 0x80) {
@@ -2285,6 +2324,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2285
2324
  }
2286
2325
  }
2287
2326
 
2327
+ /**
2328
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
2329
+ * character.
2330
+ */
2288
2331
  bool
2289
2332
  pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
2333
  if (*b < 0x80) {
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2305
2348
  #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
2306
2349
  #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
2307
2350
 
2351
+ /** UTF-8 */
2308
2352
  pm_encoding_t pm_encoding_utf_8 = {
2309
2353
  .name = "utf-8",
2310
2354
  .char_width = pm_encoding_utf_8_char_width,
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
2314
2358
  .multibyte = true
2315
2359
  };
2316
2360
 
2361
+ /** UTF8-mac */
2317
2362
  pm_encoding_t pm_encoding_utf8_mac = {
2318
2363
  .name = "utf8-mac",
2319
2364
  .char_width = pm_encoding_utf_8_char_width,
@@ -46,6 +46,7 @@ pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Windows-31J */
49
50
  pm_encoding_t pm_encoding_windows_31j = {
50
51
  .name = "windows-31j",
51
52
  .char_width = pm_encoding_windows_31j_char_width,