yarp 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -1
  3. data/CONTRIBUTING.md +7 -0
  4. data/Makefile +5 -1
  5. data/config.yml +308 -166
  6. data/docs/configuration.md +0 -1
  7. data/docs/encoding.md +5 -5
  8. data/docs/mapping.md +91 -91
  9. data/docs/serialization.md +25 -22
  10. data/ext/yarp/api_node.c +1210 -483
  11. data/ext/yarp/extension.c +22 -8
  12. data/ext/yarp/extension.h +2 -2
  13. data/include/yarp/ast.h +692 -183
  14. data/include/yarp/defines.h +2 -1
  15. data/include/yarp/diagnostic.h +200 -3
  16. data/include/yarp/enc/yp_encoding.h +10 -10
  17. data/include/yarp/node.h +0 -4
  18. data/include/yarp/parser.h +19 -19
  19. data/include/yarp/regexp.h +1 -1
  20. data/include/yarp/unescape.h +4 -4
  21. data/include/yarp/util/yp_buffer.h +3 -0
  22. data/include/yarp/util/yp_char.h +16 -16
  23. data/include/yarp/util/yp_constant_pool.h +12 -5
  24. data/include/yarp/util/yp_newline_list.h +5 -5
  25. data/include/yarp/util/yp_string.h +4 -4
  26. data/include/yarp/util/yp_string_list.h +0 -3
  27. data/include/yarp/util/yp_strpbrk.h +1 -1
  28. data/include/yarp/version.h +2 -2
  29. data/include/yarp.h +5 -4
  30. data/lib/yarp/desugar_visitor.rb +59 -122
  31. data/lib/yarp/mutation_visitor.rb +22 -12
  32. data/lib/yarp/node.rb +3081 -501
  33. data/lib/yarp/parse_result/comments.rb +172 -0
  34. data/lib/yarp/parse_result/newlines.rb +60 -0
  35. data/lib/yarp/pattern.rb +239 -0
  36. data/lib/yarp/serialize.rb +152 -129
  37. data/lib/yarp.rb +109 -49
  38. data/src/diagnostic.c +254 -2
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1871 -1466
  47. data/src/prettyprint.c +463 -230
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +352 -184
  50. data/src/unescape.c +152 -122
  51. data/src/util/yp_buffer.c +7 -2
  52. data/src/util/yp_char.c +35 -40
  53. data/src/util/yp_constant_pool.c +45 -12
  54. data/src/util/yp_memchr.c +1 -1
  55. data/src/util/yp_newline_list.c +10 -5
  56. data/src/util/yp_string.c +22 -20
  57. data/src/util/yp_string_list.c +4 -7
  58. data/src/util/yp_strncasecmp.c +3 -6
  59. data/src/util/yp_strpbrk.c +8 -8
  60. data/src/yarp.c +1288 -1021
  61. data/yarp.gemspec +4 -1
  62. metadata +6 -3
@@ -51,7 +51,6 @@ The available values for `type` are:
51
51
  * `constant[]` - A child node that is an array of constants. This is a `yp_constant_id_list_t` in C.
52
52
  * `location` - A child node that is a location. This is a `yp_location_t` in C.
53
53
  * `location?` - A child node that is a location that is optionally present. This is a `yp_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
54
- * `location[]` - A child node that is an array of locations. This is a `yp_location_list_t` in C.
55
54
  * `uint32` - A child node that is a 32-bit unsigned integer. This is a `uint32_t` in C.
56
55
 
57
56
  If the type is `node` or `node?` then the value also accepts an optional `kind` key (a string). This key is expected to match to the name of another node type within `config.yml`. This changes a couple of places where code is templated out to use the more specific struct name instead of the generic `yp_node_t`. For example, with `kind: StatementsNode` the `yp_node_t *` in C becomes a `yp_statements_node_t *`.
data/docs/encoding.md CHANGED
@@ -61,22 +61,22 @@ typedef struct {
61
61
  // Return the number of bytes that the next character takes if it is valid
62
62
  // in the encoding. Does not read more than n bytes. It is assumed that n is
63
63
  // at least 1.
64
- size_t (*char_width)(const char *c, ptrdiff_t n);
64
+ size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
65
65
 
66
66
  // Return the number of bytes that the next character takes if it is valid
67
67
  // in the encoding and is alphabetical. Does not read more than n bytes. It
68
68
  // is assumed that n is at least 1.
69
- size_t (*alpha_char)(const char *c, ptrdiff_t n);
69
+ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
70
70
 
71
71
  // Return the number of bytes that the next character takes if it is valid
72
72
  // in the encoding and is alphanumeric. Does not read more than n bytes. It
73
73
  // is assumed that n is at least 1.
74
- size_t (*alnum_char)(const char *c, ptrdiff_t n);
74
+ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
75
75
 
76
76
  // Return true if the next character is valid in the encoding and is an
77
77
  // uppercase character. Does not read more than n bytes. It is assumed that
78
78
  // n is at least 1.
79
- bool (*isupper_char)(const char *c, ptrdiff_t n);
79
+ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
80
80
 
81
81
  // The name of the encoding. This should correspond to a value that can be
82
82
  // passed to Encoding.find in Ruby.
@@ -90,7 +90,7 @@ typedef struct {
90
90
  // the ability here to call out to a user-defined function to get an encoding
91
91
  // struct. If the function returns something that isn't NULL, we set that to
92
92
  // our encoding and use it to parse identifiers.
93
- typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const char *name, size_t width);
93
+ typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
94
94
 
95
95
  // Register a callback that will be called when YARP encounters a magic comment
96
96
  // with an encoding referenced that it doesn't understand. The callback should
data/docs/mapping.md CHANGED
@@ -10,108 +10,108 @@ The following table shows how the various CRuby nodes are mapped to YARP nodes.
10
10
  | --- | --- |
11
11
  | `NODE_SCOPE` | |
12
12
  | `NODE_BLOCK` | |
13
- | `NODE_IF` | `YP_NODE_IF_NODE` |
14
- | `NODE_UNLESS` | `YP_NODE_UNLESS_NODE` |
15
- | `NODE_CASE` | `YP_NODE_CASE_NODE` |
16
- | `NODE_CASE2` | `YP_NODE_CASE_NODE` (with a null predicate) |
13
+ | `NODE_IF` | `YP_IF_NODE` |
14
+ | `NODE_UNLESS` | `YP_UNLESS_NODE` |
15
+ | `NODE_CASE` | `YP_CASE_NODE` |
16
+ | `NODE_CASE2` | `YP_CASE_NODE` (with a null predicate) |
17
17
  | `NODE_CASE3` | |
18
- | `NODE_WHEN` | `YP_NODE_WHEN_NODE` |
19
- | `NODE_IN` | `YP_NODE_IN_NODE` |
20
- | `NODE_WHILE` | `YP_NODE_WHILE_NODE` |
21
- | `NODE_UNTIL` | `YP_NODE_UNTIL_NODE` |
22
- | `NODE_ITER` | `YP_NODE_CALL_NODE` (with a non-null block) |
23
- | `NODE_FOR` | `YP_NODE_FOR_NODE` |
24
- | `NODE_FOR_MASGN` | `YP_NODE_FOR_NODE` (with a multi-write node as the index) |
25
- | `NODE_BREAK` | `YP_NODE_BREAK_NODE` |
26
- | `NODE_NEXT` | `YP_NODE_NEXT_NODE` |
27
- | `NODE_REDO` | `YP_NODE_REDO_NODE` |
28
- | `NODE_RETRY` | `YP_NODE_RETRY_NODE` |
29
- | `NODE_BEGIN` | `YP_NODE_BEGIN_NODE` |
30
- | `NODE_RESCUE` | `YP_NODE_RESCUE_NODE` |
18
+ | `NODE_WHEN` | `YP_WHEN_NODE` |
19
+ | `NODE_IN` | `YP_IN_NODE` |
20
+ | `NODE_WHILE` | `YP_WHILE_NODE` |
21
+ | `NODE_UNTIL` | `YP_UNTIL_NODE` |
22
+ | `NODE_ITER` | `YP_CALL_NODE` (with a non-null block) |
23
+ | `NODE_FOR` | `YP_FOR_NODE` |
24
+ | `NODE_FOR_MASGN` | `YP_FOR_NODE` (with a multi-write node as the index) |
25
+ | `NODE_BREAK` | `YP_BREAK_NODE` |
26
+ | `NODE_NEXT` | `YP_NEXT_NODE` |
27
+ | `NODE_REDO` | `YP_REDO_NODE` |
28
+ | `NODE_RETRY` | `YP_RETRY_NODE` |
29
+ | `NODE_BEGIN` | `YP_BEGIN_NODE` |
30
+ | `NODE_RESCUE` | `YP_RESCUE_NODE` |
31
31
  | `NODE_RESBODY` | |
32
- | `NODE_ENSURE` | `YP_NODE_ENSURE_NODE` |
33
- | `NODE_AND` | `YP_NODE_AND_NODE` |
34
- | `NODE_OR` | `YP_NODE_OR_NODE` |
35
- | `NODE_MASGN` | `YP_NODE_MULTI_WRITE_NODE` |
36
- | `NODE_LASGN` | `YP_NODE_LOCAL_VARIABLE_WRITE_NODE` |
37
- | `NODE_DASGN` | `YP_NODE_LOCAL_VARIABLE_WRITE_NODE` |
38
- | `NODE_GASGN` | `YP_NODE_GLOBAL_VARIABLE_WRITE_NODE` |
39
- | `NODE_IASGN` | `YP_NODE_INSTANCE_VARIABLE_WRITE_NODE` |
40
- | `NODE_CDECL` | `YP_NODE_CONSTANT_PATH_WRITE_NODE` |
41
- | `NODE_CVASGN` | `YP_NODE_CLASS_VARIABLE_WRITE_NODE` |
32
+ | `NODE_ENSURE` | `YP_ENSURE_NODE` |
33
+ | `NODE_AND` | `YP_AND_NODE` |
34
+ | `NODE_OR` | `YP_OR_NODE` |
35
+ | `NODE_MASGN` | `YP_MULTI_WRITE_NODE` |
36
+ | `NODE_LASGN` | `YP_LOCAL_VARIABLE_WRITE_NODE` |
37
+ | `NODE_DASGN` | `YP_LOCAL_VARIABLE_WRITE_NODE` |
38
+ | `NODE_GASGN` | `YP_GLOBAL_VARIABLE_WRITE_NODE` |
39
+ | `NODE_IASGN` | `YP_INSTANCE_VARIABLE_WRITE_NODE` |
40
+ | `NODE_CDECL` | `YP_CONSTANT_PATH_WRITE_NODE` |
41
+ | `NODE_CVASGN` | `YP_CLASS_VARIABLE_WRITE_NODE` |
42
42
  | `NODE_OP_ASGN1` | |
43
43
  | `NODE_OP_ASGN2` | |
44
- | `NODE_OP_ASGN_AND` | `YP_NODE_OPERATOR_AND_ASSIGNMENT_NODE` |
45
- | `NODE_OP_ASGN_OR` | `YP_NODE_OPERATOR_OR_ASSIGNMENT_NODE` |
44
+ | `NODE_OP_ASGN_AND` | `YP_OPERATOR_AND_ASSIGNMENT_NODE` |
45
+ | `NODE_OP_ASGN_OR` | `YP_OPERATOR_OR_ASSIGNMENT_NODE` |
46
46
  | `NODE_OP_CDECL` | |
47
- | `NODE_CALL` | `YP_NODE_CALL_NODE` |
48
- | `NODE_OPCALL` | `YP_NODE_CALL_NODE` (with an operator as the method) |
49
- | `NODE_FCALL` | `YP_NODE_CALL_NODE` (with a null receiver and parentheses) |
50
- | `NODE_VCALL` | `YP_NODE_CALL_NODE` (with a null receiver and parentheses or arguments) |
51
- | `NODE_QCALL` | `YP_NODE_CALL_NODE` (with a &. operator) |
52
- | `NODE_SUPER` | `YP_NODE_SUPER_NODE` |
53
- | `NODE_ZSUPER` | `YP_NODE_FORWARDING_SUPER_NODE` |
54
- | `NODE_LIST` | `YP_NODE_ARRAY_NODE` |
55
- | `NODE_ZLIST` | `YP_NODE_ARRAY_NODE` (with no child elements) |
56
- | `NODE_VALUES` | `YP_NODE_ARGUMENTS_NODE` |
57
- | `NODE_HASH` | `YP_NODE_HASH_NODE` |
58
- | `NODE_RETURN` | `YP_NODE_RETURN_NODE` |
59
- | `NODE_YIELD` | `YP_NODE_YIELD_NODE` |
60
- | `NODE_LVAR` | `YP_NODE_LOCAL_VARIABLE_READ_NODE` |
61
- | `NODE_DVAR` | `YP_NODE_LOCAL_VARIABLE_READ_NODE` |
62
- | `NODE_GVAR` | `YP_NODE_GLOBAL_VARIABLE_READ_NODE` |
63
- | `NODE_IVAR` | `YP_NODE_INSTANCE_VARIABLE_READ_NODE` |
64
- | `NODE_CONST` | `YP_NODE_CONSTANT_PATH_READ_NODE` |
65
- | `NODE_CVAR` | `YP_NODE_CLASS_VARIABLE_READ_NODE` |
66
- | `NODE_NTH_REF` | `YP_NODE_NUMBERED_REFERENCE_READ_NODE` |
67
- | `NODE_BACK_REF` | `YP_NODE_BACK_REFERENCE_READ_NODE` |
47
+ | `NODE_CALL` | `YP_CALL_NODE` |
48
+ | `NODE_OPCALL` | `YP_CALL_NODE` (with an operator as the method) |
49
+ | `NODE_FCALL` | `YP_CALL_NODE` (with a null receiver and parentheses) |
50
+ | `NODE_VCALL` | `YP_CALL_NODE` (with a null receiver and parentheses or arguments) |
51
+ | `NODE_QCALL` | `YP_CALL_NODE` (with a &. operator) |
52
+ | `NODE_SUPER` | `YP_SUPER_NODE` |
53
+ | `NODE_ZSUPER` | `YP_FORWARDING_SUPER_NODE` |
54
+ | `NODE_LIST` | `YP_ARRAY_NODE` |
55
+ | `NODE_ZLIST` | `YP_ARRAY_NODE` (with no child elements) |
56
+ | `NODE_VALUES` | `YP_ARGUMENTS_NODE` |
57
+ | `NODE_HASH` | `YP_HASH_NODE` |
58
+ | `NODE_RETURN` | `YP_RETURN_NODE` |
59
+ | `NODE_YIELD` | `YP_YIELD_NODE` |
60
+ | `NODE_LVAR` | `YP_LOCAL_VARIABLE_READ_NODE` |
61
+ | `NODE_DVAR` | `YP_LOCAL_VARIABLE_READ_NODE` |
62
+ | `NODE_GVAR` | `YP_GLOBAL_VARIABLE_READ_NODE` |
63
+ | `NODE_IVAR` | `YP_INSTANCE_VARIABLE_READ_NODE` |
64
+ | `NODE_CONST` | `YP_CONSTANT_PATH_READ_NODE` |
65
+ | `NODE_CVAR` | `YP_CLASS_VARIABLE_READ_NODE` |
66
+ | `NODE_NTH_REF` | `YP_NUMBERED_REFERENCE_READ_NODE` |
67
+ | `NODE_BACK_REF` | `YP_BACK_REFERENCE_READ_NODE` |
68
68
  | `NODE_MATCH` | |
69
- | `NODE_MATCH2` | `YP_NODE_CALL_NODE` (with regular expression as receiver) |
70
- | `NODE_MATCH3` | `YP_NODE_CALL_NODE` (with regular expression as only argument) |
69
+ | `NODE_MATCH2` | `YP_CALL_NODE` (with regular expression as receiver) |
70
+ | `NODE_MATCH3` | `YP_CALL_NODE` (with regular expression as only argument) |
71
71
  | `NODE_LIT` | |
72
- | `NODE_STR` | `YP_NODE_STRING_NODE` |
73
- | `NODE_DSTR` | `YP_NODE_INTERPOLATED_STRING_NODE` |
74
- | `NODE_XSTR` | `YP_NODE_X_STRING_NODE` |
75
- | `NODE_DXSTR` | `YP_NODE_INTERPOLATED_X_STRING_NODE` |
76
- | `NODE_EVSTR` | `YP_NODE_STRING_INTERPOLATED_NODE` |
77
- | `NODE_DREGX` | `YP_NODE_INTERPOLATED_REGULAR_EXPRESSION_NODE` |
72
+ | `NODE_STR` | `YP_STRING_NODE` |
73
+ | `NODE_DSTR` | `YP_INTERPOLATED_STRING_NODE` |
74
+ | `NODE_XSTR` | `YP_X_STRING_NODE` |
75
+ | `NODE_DXSTR` | `YP_INTERPOLATED_X_STRING_NODE` |
76
+ | `NODE_EVSTR` | `YP_STRING_INTERPOLATED_NODE` |
77
+ | `NODE_DREGX` | `YP_INTERPOLATED_REGULAR_EXPRESSION_NODE` |
78
78
  | `NODE_ONCE` | |
79
- | `NODE_ARGS` | `YP_NODE_PARAMETERS_NODE` |
79
+ | `NODE_ARGS` | `YP_PARAMETERS_NODE` |
80
80
  | `NODE_ARGS_AUX` | |
81
- | `NODE_OPT_ARG` | `YP_NODE_OPTIONAL_PARAMETER_NODE` |
82
- | `NODE_KW_ARG` | `YP_NODE_KEYWORD_PARAMETER_NODE` |
83
- | `NODE_POSTARG` | `YP_NODE_REQUIRED_PARAMETER_NODE` |
81
+ | `NODE_OPT_ARG` | `YP_OPTIONAL_PARAMETER_NODE` |
82
+ | `NODE_KW_ARG` | `YP_KEYWORD_PARAMETER_NODE` |
83
+ | `NODE_POSTARG` | `YP_REQUIRED_PARAMETER_NODE` |
84
84
  | `NODE_ARGSCAT` | |
85
85
  | `NODE_ARGSPUSH` | |
86
- | `NODE_SPLAT` | `YP_NODE_SPLAT_NODE` |
87
- | `NODE_BLOCK_PASS` | `YP_NODE_BLOCK_ARGUMENT_NODE` |
88
- | `NODE_DEFN` | `YP_NODE_DEF_NODE` (with a null receiver) |
89
- | `NODE_DEFS` | `YP_NODE_DEF_NODE` (with a non-null receiver) |
90
- | `NODE_ALIAS` | `YP_NODE_ALIAS_NODE` |
91
- | `NODE_VALIAS` | `YP_NODE_ALIAS_NODE` (with a global variable first argument) |
92
- | `NODE_UNDEF` | `YP_NODE_UNDEF_NODE` |
93
- | `NODE_CLASS` | `YP_NODE_CLASS_NODE` |
94
- | `NODE_MODULE` | `YP_NODE_MODULE_NODE` |
95
- | `NODE_SCLASS` | `YP_NODE_S_CLASS_NODE` |
96
- | `NODE_COLON2` | `YP_NODE_CONSTANT_PATH_NODE` |
97
- | `NODE_COLON3` | `YP_NODE_CONSTANT_PATH_NODE` (with a null receiver) |
98
- | `NODE_DOT2` | `YP_NODE_RANGE_NODE` (with a .. operator) |
99
- | `NODE_DOT3` | `YP_NODE_RANGE_NODE` (with a ... operator) |
100
- | `NODE_FLIP2` | `YP_NODE_RANGE_NODE` (with a .. operator) |
101
- | `NODE_FLIP3` | `YP_NODE_RANGE_NODE` (with a ... operator) |
102
- | `NODE_SELF` | `YP_NODE_SELF_NODE` |
103
- | `NODE_NIL` | `YP_NODE_NIL_NODE` |
104
- | `NODE_TRUE` | `YP_NODE_TRUE_NODE` |
105
- | `NODE_FALSE` | `YP_NODE_FALSE_NODE` |
86
+ | `NODE_SPLAT` | `YP_SPLAT_NODE` |
87
+ | `NODE_BLOCK_PASS` | `YP_BLOCK_ARGUMENT_NODE` |
88
+ | `NODE_DEFN` | `YP_DEF_NODE` (with a null receiver) |
89
+ | `NODE_DEFS` | `YP_DEF_NODE` (with a non-null receiver) |
90
+ | `NODE_ALIAS` | `YP_ALIAS_NODE` |
91
+ | `NODE_VALIAS` | `YP_ALIAS_NODE` (with a global variable first argument) |
92
+ | `NODE_UNDEF` | `YP_UNDEF_NODE` |
93
+ | `NODE_CLASS` | `YP_CLASS_NODE` |
94
+ | `NODE_MODULE` | `YP_MODULE_NODE` |
95
+ | `NODE_SCLASS` | `YP_S_CLASS_NODE` |
96
+ | `NODE_COLON2` | `YP_CONSTANT_PATH_NODE` |
97
+ | `NODE_COLON3` | `YP_CONSTANT_PATH_NODE` (with a null receiver) |
98
+ | `NODE_DOT2` | `YP_RANGE_NODE` (with a .. operator) |
99
+ | `NODE_DOT3` | `YP_RANGE_NODE` (with a ... operator) |
100
+ | `NODE_FLIP2` | `YP_RANGE_NODE` (with a .. operator) |
101
+ | `NODE_FLIP3` | `YP_RANGE_NODE` (with a ... operator) |
102
+ | `NODE_SELF` | `YP_SELF_NODE` |
103
+ | `NODE_NIL` | `YP_NIL_NODE` |
104
+ | `NODE_TRUE` | `YP_TRUE_NODE` |
105
+ | `NODE_FALSE` | `YP_FALSE_NODE` |
106
106
  | `NODE_ERRINFO` | |
107
- | `NODE_DEFINED` | `YP_NODE_DEFINED_NODE` |
108
- | `NODE_POSTEXE` | `YP_NODE_POST_EXECUTION_NODE` |
109
- | `NODE_DSYM` | `YP_NODE_INTERPOLATED_SYMBOL_NODE` |
110
- | `NODE_ATTRASGN` | `YP_NODE_CALL_NODE` (with a message that ends with =) |
111
- | `NODE_LAMBDA` | `YP_NODE_LAMBDA_NODE` |
112
- | `NODE_ARYPTN` | `YP_NODE_ARRAY_PATTERN_NODE` |
113
- | `NODE_HSHPTN` | `YP_NODE_HASH_PATTERN_NODE` |
114
- | `NODE_FNDPTN` | `YP_NODE_FIND_PATTERN_NODE` |
115
- | `NODE_ERROR` | `YP_NODE_MISSING_NODE` |
107
+ | `NODE_DEFINED` | `YP_DEFINED_NODE` |
108
+ | `NODE_POSTEXE` | `YP_POST_EXECUTION_NODE` |
109
+ | `NODE_DSYM` | `YP_INTERPOLATED_SYMBOL_NODE` |
110
+ | `NODE_ATTRASGN` | `YP_CALL_NODE` (with a message that ends with =) |
111
+ | `NODE_LAMBDA` | `YP_LAMBDA_NODE` |
112
+ | `NODE_ARYPTN` | `YP_ARRAY_PATTERN_NODE` |
113
+ | `NODE_HSHPTN` | `YP_HASH_PATTERN_NODE` |
114
+ | `NODE_FNDPTN` | `YP_FIND_PATTERN_NODE` |
115
+ | `NODE_ERROR` | `YP_MISSING_NODE` |
116
116
  | `NODE_LAST` | |
117
117
  ```
@@ -81,32 +81,35 @@ Each node is structured like the following table:
81
81
  | `1` | node type |
82
82
  | location | node location |
83
83
 
84
- Each node's child is then appended to the serialized string.
85
- The child node types can be determined by referencing `config.yml`.
86
- Depending on the type of child node, it could take a couple of different forms, described below:
87
-
88
- * `node` - A child node that is a node itself. This is structured just as like parent node.
89
- * `node?` - A child node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
90
- * `node[]` - A child node that is an array of nodes. This is structured as a variable-length integer length, followed by the child nodes themselves.
91
- * `string` - A child node that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is structured as a variable-length integer byte length, followed by the string itself (_without_ a trailing null byte).
84
+ Every field on the node is then appended to the serialized string. The fields can be determined by referencing `config.yml`. Depending on the type of field, it could take a couple of different forms, described below:
85
+
86
+ * `node` - A field that is a node. This is structured just as like parent node.
87
+ * `node?` - A field that is a node that is optionally present. If the node is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just as like parent node.
88
+ * `node[]` - A field that is an array of nodes. This is structured as a variable-length integer length, followed by the child nodes themselves.
89
+ * `string` - A field that is a string. For example, this is used as the name of the method in a call node, since it cannot directly reference the source string (as in `@-` or `foo=`). This is structured as a variable-length integer byte length, followed by the string itself (_without_ a trailing null byte).
92
90
  * `constant` - A variable-length integer that represents an index in the constant pool.
93
- * `constant[]` - A child node that is an array of constants. This is structured as a variable-length integer length, followed by the child constants themselves.
94
- * `location` - A child node that is a location. This is structured as a variable-length integer start followed by a variable-length integer length.
95
- * `location?` - A child node that is a location that is optionally present. If the location is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just like the `location` child node.
96
- * `location[]` - A child node that is an array of locations. This is structured as a `4` byte length, followed by the locations themselves.
97
- * `uint32` - A child node that is a 32-bit unsigned integer. This is structured as a variable-length integer.
91
+ * `constant?` - An optional variable-length integer that represents an index in the constant pool. If it's not present, then a single `0` byte will be written in its place.
92
+ * `location` - A field that is a location. This is structured as a variable-length integer start followed by a variable-length integer length.
93
+ * `location?` - A field that is a location that is optionally present. If the location is not present, then a single `0` byte will be written in its place. If it is present, then it will be structured just like the `location` child node.
94
+ * `uint32` - A field that is a 32-bit unsigned integer. This is structured as a variable-length integer.
95
+
96
+ After the syntax tree, the content pool is serialized. This is a list of constants that were referenced from within the tree. The content pool begins at the offset specified in the header. Constants can be either "owned" (in which case their contents are embedded in the serialization) or "shared" (in which case their contents represent a slice of the source string). The most significant bit of the constant indicates whether it is owned or shared.
97
+
98
+ In the case that it is owned, the constant is structured as follows:
99
+
100
+ | # bytes | field |
101
+ | --- | --- |
102
+ | `4` | the byte offset in the serialization for the contents of the constant |
103
+ | `4` | the byte length in the serialization |
98
104
 
99
- After the syntax tree, the content pool is serialized.
100
- This is a list of constants that were referenced from within the tree.
101
- The content pool begins at the offset specified in the header.
102
- Each constant is structured as:
105
+ Note that you will need to mask off the most significant bit for the byte offset in the serialization. In the case that it is shared, the constant is structured as follows:
103
106
 
104
107
  | # bytes | field |
105
108
  | --- | --- |
106
- | `4` | the byte offset in the source |
107
- | `4` | the byte length in the source |
109
+ | `4` | the byte offset in the source string for the contents of the constant |
110
+ | `4` | the byte length in the source string |
108
111
 
109
- At the end of the serialization, the buffer is null terminated.
112
+ After the constant pool, the contents of the owned constants are serialized. This is just a sequence of bytes that represent the contents of the constants. At the end of the serialization, the buffer is null terminated.
110
113
 
111
114
  ## APIs
112
115
 
@@ -130,14 +133,14 @@ void yp_buffer_free(yp_buffer_t *);
130
133
 
131
134
  // Parse and serialize the AST represented by the given source to the given
132
135
  // buffer.
133
- void yp_parse_serialize(const char *, size_t, yp_buffer_t *, const char *);
136
+ void yp_parse_serialize(const uint8_t *source, size_t length, yp_buffer_t *buffer, const char *metadata);
134
137
  ```
135
138
 
136
139
  Typically you would use a stack-allocated `yp_buffer_t` and call `yp_parse_serialize`, as in:
137
140
 
138
141
  ```c
139
142
  void
140
- serialize(const char *source, size_t length) {
143
+ serialize(const uint8_t *source, size_t length) {
141
144
  yp_buffer_t buffer;
142
145
  if (!yp_buffer_init(&buffer)) return;
143
146