prism 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/enc/pm_unicode.c CHANGED
@@ -1,15 +1,14 @@
1
- // Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
2
- // decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
3
-
4
1
  #include "prism/enc/pm_encoding.h"
5
2
 
6
3
  typedef uint32_t pm_unicode_codepoint_t;
7
4
 
8
- // Each element of the following table contains a bitfield that indicates a
9
- // piece of information about the corresponding unicode codepoint. Note that
10
- // this table is different from other encodings where we used a lookup table
11
- // because the indices of those tables are the byte representations, not the
12
- // codepoints themselves.
5
+ /**
6
+ * Each element of the following table contains a bitfield that indicates a
7
+ * piece of information about the corresponding unicode codepoint. Note that
8
+ * this table is different from other encodings where we used a lookup table
9
+ * because the indices of those tables are the byte representations, not the
10
+ * codepoints themselves.
11
+ */
13
12
  const uint8_t pm_encoding_unicode_table[256] = {
14
13
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
14
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
@@ -2179,6 +2178,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
2179
2178
  0x1F170, 0x1F189,
2180
2179
  };
2181
2180
 
2181
+ /**
2182
+ * Binary search through the given list of codepoints to see if the given
2183
+ * codepoint is in the list.
2184
+ */
2182
2185
  static bool
2183
2186
  pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
2184
2187
  size_t start = 0;
@@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co
2202
2205
  return false;
2203
2206
  }
2204
2207
 
2208
+ /**
2209
+ * A state transition table for decoding UTF-8.
2210
+ *
2211
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
2212
+ *
2213
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
2214
+ * of this software and associated documentation files (the "Software"), to deal
2215
+ * in the Software without restriction, including without limitation the rights
2216
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
2217
+ * copies of the Software, and to permit persons to whom the Software is
2218
+ * furnished to do so, subject to the following conditions:
2219
+ *
2220
+ * The above copyright notice and this permission notice shall be included in
2221
+ * all copies or substantial portions of the Software.
2222
+ *
2223
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2224
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2225
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2226
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2227
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2228
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2229
+ * SOFTWARE.
2230
+ */
2205
2231
  static const uint8_t pm_utf_8_dfa[] = {
2206
2232
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
2207
2233
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
@@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = {
2219
2245
  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
2220
2246
  };
2221
2247
 
2248
+ /**
2249
+ * Given a pointer to a string and the number of bytes remaining in the string,
2250
+ * decode the next UTF-8 codepoint and return it. The number of bytes consumed
2251
+ * is returned in the width out parameter.
2252
+ */
2222
2253
  static pm_unicode_codepoint_t
2223
2254
  pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2255
  assert(n >= 1);
@@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2253
2284
  return width;
2254
2285
  }
2255
2286
 
2287
+ /**
2288
+ * Return the size of the next character in the UTF-8 encoding if it is an
2289
+ * alphabetical character.
2290
+ */
2256
2291
  size_t
2257
2292
  pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
2293
  if (*b < 0x80) {
@@ -2269,6 +2304,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2269
2304
  }
2270
2305
  }
2271
2306
 
2307
+ /**
2308
+ * Return the size of the next character in the UTF-8 encoding if it is an
2309
+ * alphanumeric character.
2310
+ */
2272
2311
  size_t
2273
2312
  pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
2313
  if (*b < 0x80) {
@@ -2285,6 +2324,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2285
2324
  }
2286
2325
  }
2287
2326
 
2327
+ /**
2328
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
2329
+ * character.
2330
+ */
2288
2331
  bool
2289
2332
  pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
2333
  if (*b < 0x80) {
@@ -2305,6 +2348,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2305
2348
  #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
2306
2349
  #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
2307
2350
 
2351
+ /** UTF-8 */
2308
2352
  pm_encoding_t pm_encoding_utf_8 = {
2309
2353
  .name = "utf-8",
2310
2354
  .char_width = pm_encoding_utf_8_char_width,
@@ -2314,6 +2358,7 @@ pm_encoding_t pm_encoding_utf_8 = {
2314
2358
  .multibyte = true
2315
2359
  };
2316
2360
 
2361
+ /** UTF8-mac */
2317
2362
  pm_encoding_t pm_encoding_utf8_mac = {
2318
2363
  .name = "utf8-mac",
2319
2364
  .char_width = pm_encoding_utf_8_char_width,
@@ -46,6 +46,7 @@ pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
46
46
  }
47
47
  }
48
48
 
49
+ /** Windows-31J */
49
50
  pm_encoding_t pm_encoding_windows_31j = {
50
51
  .name = "windows-31j",
51
52
  .char_width = pm_encoding_windows_31j_char_width,