yarp 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
@@ -7,6 +7,7 @@ A lot of code in YARP's repository is templated from a single configuration file
7
7
  * `java/org/yarp/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java
8
8
  * `java/org/yarp/Loader.java` - for defining how to deserialize the nodes in Java
9
9
  * `java/org/yarp/Nodes.java` - for defining the nodes in Java
10
+ * `lib/yarp/mutation_visitor.rb` - for defining the mutation visitor for the nodes in Ruby
10
11
  * `lib/yarp/node.rb` - for defining the nodes in Ruby
11
12
  * `lib/yarp/serialize.rb` - for defining how to deserialize the nodes in Ruby
12
13
  * `src/node.c` - for defining how to free the nodes in C and calculate the size in memory in C
data/docs/encoding.md CHANGED
@@ -61,22 +61,22 @@ typedef struct {
61
61
  // Return the number of bytes that the next character takes if it is valid
62
62
  // in the encoding. Does not read more than n bytes. It is assumed that n is
63
63
  // at least 1.
64
- size_t (*char_width)(const char *c, ptrdiff_t n);
64
+ size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
65
65
 
66
66
  // Return the number of bytes that the next character takes if it is valid
67
67
  // in the encoding and is alphabetical. Does not read more than n bytes. It
68
68
  // is assumed that n is at least 1.
69
- size_t (*alpha_char)(const char *c, ptrdiff_t n);
69
+ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
70
70
 
71
71
  // Return the number of bytes that the next character takes if it is valid
72
72
  // in the encoding and is alphanumeric. Does not read more than n bytes. It
73
73
  // is assumed that n is at least 1.
74
- size_t (*alnum_char)(const char *c, ptrdiff_t n);
74
+ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
75
75
 
76
76
  // Return true if the next character is valid in the encoding and is an
77
77
  // uppercase character. Does not read more than n bytes. It is assumed that
78
78
  // n is at least 1.
79
- bool (*isupper_char)(const char *c, ptrdiff_t n);
79
+ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
80
80
 
81
81
  // The name of the encoding. This should correspond to a value that can be
82
82
  // passed to Encoding.find in Ruby.
@@ -90,7 +90,7 @@ typedef struct {
90
90
  // the ability here to call out to a user-defined function to get an encoding
91
91
  // struct. If the function returns something that isn't NULL, we set that to
92
92
  // our encoding and use it to parse identifiers.
93
- typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const char *name, size_t width);
93
+ typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
94
94
 
95
95
  // Register a callback that will be called when YARP encounters a magic comment
96
96
  // with an encoding referenced that it doesn't understand. The callback should
data/docs/ruby_api.md CHANGED
@@ -20,4 +20,6 @@ The full API is documented below.
20
20
  * `YARP.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
21
21
  * `YARP.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
22
22
  * `YARP.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
23
+ * `YARP.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
24
+ * `YARP.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
23
25
  * `YARP.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree
@@ -73,7 +73,7 @@ The header is structured like the following table:
73
73
  | varint | content pool size |
74
74
 
75
75
  After the header comes the body of the serialized string.
76
- The body consistents of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
76
+ The body consists of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
77
77
  Each node is structured like the following table:
78
78
 
79
79
  | # bytes | field |
@@ -130,14 +130,14 @@ void yp_buffer_free(yp_buffer_t *);
130
130
 
131
131
  // Parse and serialize the AST represented by the given source to the given
132
132
  // buffer.
133
- void yp_parse_serialize(const char *, size_t, yp_buffer_t *, const char *);
133
+ void yp_parse_serialize(const uint8_t *source, size_t length, yp_buffer_t *buffer, const char *metadata);
134
134
  ```
135
135
 
136
136
  Typically you would use a stack-allocated `yp_buffer_t` and call `yp_parse_serialize`, as in:
137
137
 
138
138
  ```c
139
139
  void
140
- serialize(const char *source, size_t length) {
140
+ serialize(const uint8_t *source, size_t length) {
141
141
  yp_buffer_t buffer;
142
142
  if (!yp_buffer_init(&buffer)) return;
143
143
 
data/docs/testing.md CHANGED
@@ -12,9 +12,9 @@ These test specific YARP implementation details like comments, errors, and regul
12
12
 
13
13
  ### Snapshot tests
14
14
 
15
- Snapshot tests ensure that parsed output is equivalent to previous parsed output. There are many categorized examples of valid syntax within the `test/fixtures/` directory. When the test suite runs, it will parse all of this syntax, and compare it against corresponding files in the `test/snapshots/` directory. For example, `test/fixtures/strings.txt` has a corresponding `test/snapshots/strings.txt`.
15
+ Snapshot tests ensure that parsed output is equivalent to previous parsed output. There are many categorized examples of valid syntax within the `test/yarp/fixtures/` directory. When the test suite runs, it will parse all of this syntax, and compare it against corresponding files in the `test/yarp/snapshots/` directory. For example, `test/yarp/fixtures/strings.txt` has a corresponding `test/yarp/snapshots/strings.txt`.
16
16
 
17
- If the parsed files do not match, it will raise an error. If there is not a corresponding file in the `test/snapshots/` directory, one will be created so that it exists for the next test run.
17
+ If the parsed files do not match, it will raise an error. If there is not a corresponding file in the `test/yarp/snapshots/` directory, one will be created so that it exists for the next test run.
18
18
 
19
19
  ### Testing against repositories
20
20