yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,67 @@
1
+ // This struct represents an abstract linked list that provides common
2
+ // functionality. It is meant to be used any time a linked list is necessary to
3
+ // store data.
4
+ //
5
+ // The linked list itself operates off a set of pointers. Because the pointers
6
+ // are not necessarily sequential, they can be of any size. We use this fact to
7
+ // allow the consumer of this linked list to extend the node struct to include
8
+ // any data they want. This is done by using the yp_list_node_t as the first
9
+ // member of the struct.
10
+ //
11
+ // For example, if we want to store a list of integers, we can do the following:
12
+ //
13
+ // typedef struct {
14
+ // yp_list_node_t node;
15
+ // int value;
16
+ // } yp_int_node_t;
17
+ //
18
+ // yp_list_t list;
19
+ // yp_list_init(&list);
20
+ //
21
+ // yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
22
+ // node->value = 5;
23
+ //
24
+ // yp_list_append(&list, &node->node);
25
+ //
26
+ // The yp_list_t struct is used to represent the overall linked list. It
27
+ // contains a pointer to the head and tail of the list. This allows for easy
28
+ // iteration and appending of new nodes.
29
+
30
+ #ifndef YARP_LIST_H
31
+ #define YARP_LIST_H
32
+
33
+ #include "yarp/defines.h"
34
+
35
+ #include <stdbool.h>
36
+ #include <stddef.h>
37
+ #include <stdint.h>
38
+ #include <stdlib.h>
39
+
40
+ // This represents a node in the linked list.
41
+ typedef struct yp_list_node {
42
+ struct yp_list_node *next;
43
+ } yp_list_node_t;
44
+
45
+ // This represents the overall linked list. It keeps a pointer to the head and
46
+ // tail so that iteration is easy and pushing new nodes is easy.
47
+ typedef struct {
48
+ yp_list_node_t *head;
49
+ yp_list_node_t *tail;
50
+ } yp_list_t;
51
+
52
+ // Initializes a new list.
53
+ YP_EXPORTED_FUNCTION void yp_list_init(yp_list_t *list);
54
+
55
+ // Returns true if the given list is empty.
56
+ YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
57
+
58
+ // Returns the size of the list in O(n) time.
59
+ YP_EXPORTED_FUNCTION uint32_t yp_list_size(yp_list_t *list);
60
+
61
+ // Append a node to the given list.
62
+ void yp_list_append(yp_list_t *list, yp_list_node_t *node);
63
+
64
+ // Deallocate the internal state of the given list.
65
+ YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
66
+
67
+ #endif
@@ -0,0 +1,14 @@
1
+ #ifndef YP_MEMCHR_H
2
+ #define YP_MEMCHR_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/enc/yp_encoding.h"
6
+
7
+ #include <stddef.h>
8
+
9
+ // We need to roll our own memchr to handle cases where the encoding changes and
10
+ // we need to search for a character in a buffer that could be the trailing byte
11
+ // of a multibyte character.
12
+ void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
13
+
14
+ #endif
@@ -0,0 +1,54 @@
1
+ // When compiling the syntax tree, it's necessary to know the line and column
2
+ // of many nodes. This is necessary to support things like error messages,
3
+ // tracepoints, etc.
4
+ //
5
+ // It's possible that we could store the start line, start column, end line, and
6
+ // end column on every node in addition to the offsets that we already store,
7
+ // but that would be quite a lot of memory overhead.
8
+
9
+ #ifndef YP_NEWLINE_LIST_H
10
+ #define YP_NEWLINE_LIST_H
11
+
12
+ #include "yarp/defines.h"
13
+
14
+ #include <assert.h>
15
+ #include <stdbool.h>
16
+ #include <stddef.h>
17
+ #include <stdlib.h>
18
+
19
+ // A list of offsets of newlines in a string. The offsets are assumed to be
20
+ // sorted/inserted in ascending order.
21
+ typedef struct {
22
+ const char *start;
23
+
24
+ size_t *offsets;
25
+ size_t size;
26
+ size_t capacity;
27
+
28
+ size_t last_offset;
29
+ size_t last_index;
30
+ } yp_newline_list_t;
31
+
32
+ // A line and column in a string.
33
+ typedef struct {
34
+ size_t line;
35
+ size_t column;
36
+ } yp_line_column_t;
37
+
38
+ // Initialize a new newline list with the given capacity. Returns true if the
39
+ // allocation of the offsets succeeds, otherwise returns false.
40
+ bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
41
+
42
+ // Append a new offset to the newline list. Returns true if the reallocation of
43
+ // the offsets succeeds (if one was necessary), otherwise returns false.
44
+ bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor);
45
+
46
+ // Returns the line and column of the given offset. If the offset is not in the
47
+ // list, the line and column of the closest offset less than the given offset
48
+ // are returned.
49
+ yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor);
50
+
51
+ // Free the internal memory allocated for the newline list.
52
+ void yp_newline_list_free(yp_newline_list_t *list);
53
+
54
+ #endif
@@ -0,0 +1,24 @@
1
+ #ifndef YP_STATE_STACK_H
2
+ #define YP_STATE_STACK_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stdint.h>
8
+
9
+ // A struct that represents a stack of bools.
10
+ typedef uint32_t yp_state_stack_t;
11
+
12
+ // Initializes the state stack to an empty stack.
13
+ void yp_state_stack_init(yp_state_stack_t *stack);
14
+
15
+ // Pushes a value onto the stack.
16
+ void yp_state_stack_push(yp_state_stack_t *stack, bool value);
17
+
18
+ // Pops a value off the stack.
19
+ void yp_state_stack_pop(yp_state_stack_t *stack);
20
+
21
+ // Returns the value at the top of the stack.
22
+ bool yp_state_stack_p(yp_state_stack_t *stack);
23
+
24
+ #endif
@@ -0,0 +1,57 @@
1
+ #ifndef YARP_STRING_H
2
+ #define YARP_STRING_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+
12
+ // This struct represents a string value.
13
+ typedef struct {
14
+ enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
15
+ char *source;
16
+ size_t length;
17
+ } yp_string_t;
18
+
19
+ #define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
20
+
21
+ // Initialize a shared string that is based on initial input.
22
+ void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
23
+
24
+ // Initialize an owned string that is responsible for freeing allocated memory.
25
+ void yp_string_owned_init(yp_string_t *string, char *source, size_t length);
26
+
27
+ // Initialize a constant string that doesn't own its memory source.
28
+ void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
29
+
30
+ // Read the file indicated by the filepath parameter into source and load its
31
+ // contents and size into the given yp_string_t.
32
+ // The given yp_string_t should be freed using yp_string_free() when it is no longer used.
33
+ //
34
+ // We want to use demand paging as much as possible in order to avoid having to
35
+ // read the entire file into memory (which could be detrimental to performance
36
+ // for large files). This means that if we're on windows we'll use
37
+ // `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
38
+ // `mmap`, and on other POSIX systems we'll use `read`.
39
+ bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
40
+
41
+ // Returns the memory size associated with the string.
42
+ size_t yp_string_memsize(const yp_string_t *string);
43
+
44
+ // Ensure the string is owned. If it is not, then reinitialize it as owned and
45
+ // copy over the previous source.
46
+ void yp_string_ensure_owned(yp_string_t *string);
47
+
48
+ // Returns the length associated with the string.
49
+ YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
50
+
51
+ // Returns the start pointer associated with the string.
52
+ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
53
+
54
+ // Free the associated memory of the given string.
55
+ YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
56
+
57
+ #endif // YARP_STRING_H
@@ -0,0 +1,28 @@
1
+ #ifndef YARP_STRING_LIST_H
2
+ #define YARP_STRING_LIST_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/util/yp_string.h"
6
+
7
+ #include <stddef.h>
8
+ #include <stdlib.h>
9
+
10
+ typedef struct {
11
+ yp_string_t *strings;
12
+ size_t length;
13
+ size_t capacity;
14
+ } yp_string_list_t;
15
+
16
+ // Allocate a new yp_string_list_t.
17
+ yp_string_list_t * yp_string_list_alloc(void);
18
+
19
+ // Initialize a yp_string_list_t with its default values.
20
+ YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
21
+
22
+ // Append a yp_string_t to the given string list.
23
+ void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
24
+
25
+ // Free the memory associated with the string list.
26
+ YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
27
+
28
+ #endif
@@ -0,0 +1,29 @@
1
+ #ifndef YP_STRPBRK_H
2
+ #define YP_STRPBRK_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/parser.h"
6
+
7
+ #include <stddef.h>
8
+ #include <string.h>
9
+
10
+ // Here we have rolled our own version of strpbrk. The standard library strpbrk
11
+ // has undefined behavior when the source string is not null-terminated. We want
12
+ // to support strings that are not null-terminated because yp_parse does not
13
+ // have the contract that the string is null-terminated. (This is desirable
14
+ // because it means the extension can call yp_parse with the result of a call to
15
+ // mmap).
16
+ //
17
+ // The standard library strpbrk also does not support passing a maximum length
18
+ // to search. We want to support this for the reason mentioned above, but we
19
+ // also don't want it to stop on null bytes. Ruby actually allows null bytes
20
+ // within strings, comments, regular expressions, etc. So we need to be able to
21
+ // skip past them.
22
+ //
23
+ // Finally, we want to support encodings wherein the charset could contain
24
+ // characters that are trailing bytes of multi-byte characters. For example, in
25
+ // Shift-JIS, the backslash character can be a trailing byte. In that case we
26
+ // need to take a slower path and iterate one multi-byte character at a time.
27
+ const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
28
+
29
+ #endif
@@ -0,0 +1,5 @@
1
+ #define YP_VERSION_MAJOR 0
2
+ #define YP_VERSION_MINOR 6
3
+ #define YP_VERSION_PATCH 0
4
+
5
+ #define YP_VERSION "0.6.0"
data/include/yarp.h ADDED
@@ -0,0 +1,69 @@
1
+ #ifndef YARP_H
2
+ #define YARP_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/ast.h"
6
+ #include "yarp/diagnostic.h"
7
+ #include "yarp/node.h"
8
+ #include "yarp/pack.h"
9
+ #include "yarp/parser.h"
10
+ #include "yarp/regexp.h"
11
+ #include "yarp/unescape.h"
12
+ #include "yarp/util/yp_buffer.h"
13
+ #include "yarp/util/yp_char.h"
14
+ #include "yarp/util/yp_memchr.h"
15
+ #include "yarp/util/yp_strpbrk.h"
16
+
17
+ #include <assert.h>
18
+ #include <stdarg.h>
19
+ #include <stdbool.h>
20
+ #include <stdint.h>
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+
25
+ #ifndef _WIN32
26
+ #include <strings.h>
27
+ #endif
28
+
29
+ void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
30
+
31
+ void yp_print_node(yp_parser_t *parser, yp_node_t *node);
32
+
33
+ // The YARP version and the serialization format.
34
+ YP_EXPORTED_FUNCTION const char * yp_version(void);
35
+
36
+ // Initialize a parser with the given start and end pointers.
37
+ YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath);
38
+
39
+ // Register a callback that will be called whenever YARP changes the encoding it
40
+ // is using to parse based on the magic comment.
41
+ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback);
42
+
43
+ // Register a callback that will be called when YARP encounters a magic comment
44
+ // with an encoding referenced that it doesn't understand. The callback should
45
+ // return NULL if it also doesn't understand the encoding or it should return a
46
+ // pointer to a yp_encoding_t struct that contains the functions necessary to
47
+ // parse identifiers.
48
+ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback);
49
+
50
+ // Free any memory associated with the given parser.
51
+ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
52
+
53
+ // Parse the Ruby source associated with the given parser and return the tree.
54
+ YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
55
+
56
+ // Pretty-prints the AST represented by the given node to the given buffer.
57
+ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
58
+
59
+ // Serialize the AST represented by the given node to the given buffer.
60
+ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
61
+
62
+ // Parse and serialize the AST represented by the given source to the given
63
+ // buffer.
64
+ YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
65
+
66
+ // Returns a string representation of the given token type.
67
+ YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
68
+
69
+ #endif