yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,67 @@
1
+ // This struct represents an abstract linked list that provides common
2
+ // functionality. It is meant to be used any time a linked list is necessary to
3
+ // store data.
4
+ //
5
+ // The linked list itself operates off a set of pointers. Because the pointers
6
+ // are not necessarily sequential, they can be of any size. We use this fact to
7
+ // allow the consumer of this linked list to extend the node struct to include
8
+ // any data they want. This is done by using the yp_list_node_t as the first
9
+ // member of the struct.
10
+ //
11
+ // For example, if we want to store a list of integers, we can do the following:
12
+ //
13
+ // typedef struct {
14
+ // yp_list_node_t node;
15
+ // int value;
16
+ // } yp_int_node_t;
17
+ //
18
+ // yp_list_t list;
19
+ // yp_list_init(&list);
20
+ //
21
+ // yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
22
+ // node->value = 5;
23
+ //
24
+ // yp_list_append(&list, &node->node);
25
+ //
26
+ // The yp_list_t struct is used to represent the overall linked list. It
27
+ // contains a pointer to the head and tail of the list. This allows for easy
28
+ // iteration and appending of new nodes.
29
+
30
+ #ifndef YARP_LIST_H
31
+ #define YARP_LIST_H
32
+
33
+ #include "yarp/defines.h"
34
+
35
+ #include <stdbool.h>
36
+ #include <stddef.h>
37
+ #include <stdint.h>
38
+ #include <stdlib.h>
39
+
40
+ // This represents a node in the linked list.
41
+ typedef struct yp_list_node {
42
+ struct yp_list_node *next;
43
+ } yp_list_node_t;
44
+
45
+ // This represents the overall linked list. It keeps a pointer to the head and
46
+ // tail so that iteration is easy and pushing new nodes is easy.
47
+ typedef struct {
48
+ yp_list_node_t *head;
49
+ yp_list_node_t *tail;
50
+ } yp_list_t;
51
+
52
+ // Initializes a new list.
53
+ YP_EXPORTED_FUNCTION void yp_list_init(yp_list_t *list);
54
+
55
+ // Returns true if the given list is empty.
56
+ YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
57
+
58
+ // Returns the size of the list in O(n) time.
59
+ YP_EXPORTED_FUNCTION uint32_t yp_list_size(yp_list_t *list);
60
+
61
+ // Append a node to the given list.
62
+ void yp_list_append(yp_list_t *list, yp_list_node_t *node);
63
+
64
+ // Deallocate the internal state of the given list.
65
+ YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
66
+
67
+ #endif
@@ -0,0 +1,14 @@
1
+ #ifndef YP_MEMCHR_H
2
+ #define YP_MEMCHR_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/enc/yp_encoding.h"
6
+
7
+ #include <stddef.h>
8
+
9
+ // We need to roll our own memchr to handle cases where the encoding changes and
10
+ // we need to search for a character in a buffer that could be the trailing byte
11
+ // of a multibyte character.
12
+ void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
13
+
14
+ #endif
@@ -0,0 +1,54 @@
1
+ // When compiling the syntax tree, it's necessary to know the line and column
2
+ // of many nodes. This is necessary to support things like error messages,
3
+ // tracepoints, etc.
4
+ //
5
+ // It's possible that we could store the start line, start column, end line, and
6
+ // end column on every node in addition to the offsets that we already store,
7
+ // but that would be quite a lot of memory overhead.
8
+
9
+ #ifndef YP_NEWLINE_LIST_H
10
+ #define YP_NEWLINE_LIST_H
11
+
12
+ #include "yarp/defines.h"
13
+
14
+ #include <assert.h>
15
+ #include <stdbool.h>
16
+ #include <stddef.h>
17
+ #include <stdlib.h>
18
+
19
+ // A list of offsets of newlines in a string. The offsets are assumed to be
20
+ // sorted/inserted in ascending order.
21
+ typedef struct {
22
+ const char *start;
23
+
24
+ size_t *offsets;
25
+ size_t size;
26
+ size_t capacity;
27
+
28
+ size_t last_offset;
29
+ size_t last_index;
30
+ } yp_newline_list_t;
31
+
32
+ // A line and column in a string.
33
+ typedef struct {
34
+ size_t line;
35
+ size_t column;
36
+ } yp_line_column_t;
37
+
38
+ // Initialize a new newline list with the given capacity. Returns true if the
39
+ // allocation of the offsets succeeds, otherwise returns false.
40
+ bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
41
+
42
+ // Append a new offset to the newline list. Returns true if the reallocation of
43
+ // the offsets succeeds (if one was necessary), otherwise returns false.
44
+ bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor);
45
+
46
+ // Returns the line and column of the given offset. If the offset is not in the
47
+ // list, the line and column of the closest offset less than the given offset
48
+ // are returned.
49
+ yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor);
50
+
51
+ // Free the internal memory allocated for the newline list.
52
+ void yp_newline_list_free(yp_newline_list_t *list);
53
+
54
+ #endif
@@ -0,0 +1,24 @@
1
+ #ifndef YP_STATE_STACK_H
2
+ #define YP_STATE_STACK_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stdint.h>
8
+
9
+ // A struct that represents a stack of bools.
10
+ typedef uint32_t yp_state_stack_t;
11
+
12
+ // Initializes the state stack to an empty stack.
13
+ void yp_state_stack_init(yp_state_stack_t *stack);
14
+
15
+ // Pushes a value onto the stack.
16
+ void yp_state_stack_push(yp_state_stack_t *stack, bool value);
17
+
18
+ // Pops a value off the stack.
19
+ void yp_state_stack_pop(yp_state_stack_t *stack);
20
+
21
+ // Returns the value at the top of the stack.
22
+ bool yp_state_stack_p(yp_state_stack_t *stack);
23
+
24
+ #endif
@@ -0,0 +1,57 @@
1
+ #ifndef YARP_STRING_H
2
+ #define YARP_STRING_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+
12
+ // This struct represents a string value.
13
+ typedef struct {
14
+ enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
15
+ char *source;
16
+ size_t length;
17
+ } yp_string_t;
18
+
19
+ #define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
20
+
21
+ // Initialize a shared string that is based on initial input.
22
+ void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
23
+
24
+ // Initialize an owned string that is responsible for freeing allocated memory.
25
+ void yp_string_owned_init(yp_string_t *string, char *source, size_t length);
26
+
27
+ // Initialize a constant string that doesn't own its memory source.
28
+ void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
29
+
30
+ // Read the file indicated by the filepath parameter into source and load its
31
+ // contents and size into the given yp_string_t.
32
+ // The given yp_string_t should be freed using yp_string_free() when it is no longer used.
33
+ //
34
+ // We want to use demand paging as much as possible in order to avoid having to
35
+ // read the entire file into memory (which could be detrimental to performance
36
+ // for large files). This means that if we're on windows we'll use
37
+ // `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
38
+ // `mmap`, and on other POSIX systems we'll use `read`.
39
+ bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
40
+
41
+ // Returns the memory size associated with the string.
42
+ size_t yp_string_memsize(const yp_string_t *string);
43
+
44
+ // Ensure the string is owned. If it is not, then reinitialize it as owned and
45
+ // copy over the previous source.
46
+ void yp_string_ensure_owned(yp_string_t *string);
47
+
48
+ // Returns the length associated with the string.
49
+ YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
50
+
51
+ // Returns the start pointer associated with the string.
52
+ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
53
+
54
+ // Free the associated memory of the given string.
55
+ YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
56
+
57
+ #endif // YARP_STRING_H
@@ -0,0 +1,28 @@
1
+ #ifndef YARP_STRING_LIST_H
2
+ #define YARP_STRING_LIST_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/util/yp_string.h"
6
+
7
+ #include <stddef.h>
8
+ #include <stdlib.h>
9
+
10
+ typedef struct {
11
+ yp_string_t *strings;
12
+ size_t length;
13
+ size_t capacity;
14
+ } yp_string_list_t;
15
+
16
+ // Allocate a new yp_string_list_t.
17
+ yp_string_list_t * yp_string_list_alloc(void);
18
+
19
+ // Initialize a yp_string_list_t with its default values.
20
+ YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
21
+
22
+ // Append a yp_string_t to the given string list.
23
+ void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
24
+
25
+ // Free the memory associated with the string list.
26
+ YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
27
+
28
+ #endif
@@ -0,0 +1,29 @@
1
+ #ifndef YP_STRPBRK_H
2
+ #define YP_STRPBRK_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/parser.h"
6
+
7
+ #include <stddef.h>
8
+ #include <string.h>
9
+
10
+ // Here we have rolled our own version of strpbrk. The standard library strpbrk
11
+ // has undefined behavior when the source string is not null-terminated. We want
12
+ // to support strings that are not null-terminated because yp_parse does not
13
+ // have the contract that the string is null-terminated. (This is desirable
14
+ // because it means the extension can call yp_parse with the result of a call to
15
+ // mmap).
16
+ //
17
+ // The standard library strpbrk also does not support passing a maximum length
18
+ // to search. We want to support this for the reason mentioned above, but we
19
+ // also don't want it to stop on null bytes. Ruby actually allows null bytes
20
+ // within strings, comments, regular expressions, etc. So we need to be able to
21
+ // skip past them.
22
+ //
23
+ // Finally, we want to support encodings wherein the charset could contain
24
+ // characters that are trailing bytes of multi-byte characters. For example, in
25
+ // Shift-JIS, the backslash character can be a trailing byte. In that case we
26
+ // need to take a slower path and iterate one multi-byte character at a time.
27
+ const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
28
+
29
+ #endif
@@ -0,0 +1,5 @@
1
+ #define YP_VERSION_MAJOR 0
2
+ #define YP_VERSION_MINOR 6
3
+ #define YP_VERSION_PATCH 0
4
+
5
+ #define YP_VERSION "0.6.0"
data/include/yarp.h ADDED
@@ -0,0 +1,69 @@
1
+ #ifndef YARP_H
2
+ #define YARP_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/ast.h"
6
+ #include "yarp/diagnostic.h"
7
+ #include "yarp/node.h"
8
+ #include "yarp/pack.h"
9
+ #include "yarp/parser.h"
10
+ #include "yarp/regexp.h"
11
+ #include "yarp/unescape.h"
12
+ #include "yarp/util/yp_buffer.h"
13
+ #include "yarp/util/yp_char.h"
14
+ #include "yarp/util/yp_memchr.h"
15
+ #include "yarp/util/yp_strpbrk.h"
16
+
17
+ #include <assert.h>
18
+ #include <stdarg.h>
19
+ #include <stdbool.h>
20
+ #include <stdint.h>
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+
25
+ #ifndef _WIN32
26
+ #include <strings.h>
27
+ #endif
28
+
29
+ void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
30
+
31
+ void yp_print_node(yp_parser_t *parser, yp_node_t *node);
32
+
33
+ // The YARP version and the serialization format.
34
+ YP_EXPORTED_FUNCTION const char * yp_version(void);
35
+
36
+ // Initialize a parser with the given start and end pointers.
37
+ YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath);
38
+
39
+ // Register a callback that will be called whenever YARP changes the encoding it
40
+ // is using to parse based on the magic comment.
41
+ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback);
42
+
43
+ // Register a callback that will be called when YARP encounters a magic comment
44
+ // with an encoding referenced that it doesn't understand. The callback should
45
+ // return NULL if it also doesn't understand the encoding or it should return a
46
+ // pointer to a yp_encoding_t struct that contains the functions necessary to
47
+ // parse identifiers.
48
+ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback);
49
+
50
+ // Free any memory associated with the given parser.
51
+ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
52
+
53
+ // Parse the Ruby source associated with the given parser and return the tree.
54
+ YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
55
+
56
+ // Pretty-prints the AST represented by the given node to the given buffer.
57
+ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
58
+
59
+ // Serialize the AST represented by the given node to the given buffer.
60
+ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
61
+
62
+ // Parse and serialize the AST represented by the given source to the given
63
+ // buffer.
64
+ YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
65
+
66
+ // Returns a string representation of the given token type.
67
+ YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
68
+
69
+ #endif