yarp 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
// This struct represents an abstract linked list that provides common
|
2
|
+
// functionality. It is meant to be used any time a linked list is necessary to
|
3
|
+
// store data.
|
4
|
+
//
|
5
|
+
// The linked list itself operates off a set of pointers. Because the pointers
|
6
|
+
// are not necessarily sequential, they can be of any size. We use this fact to
|
7
|
+
// allow the consumer of this linked list to extend the node struct to include
|
8
|
+
// any data they want. This is done by using the yp_list_node_t as the first
|
9
|
+
// member of the struct.
|
10
|
+
//
|
11
|
+
// For example, if we want to store a list of integers, we can do the following:
|
12
|
+
//
|
13
|
+
// typedef struct {
|
14
|
+
// yp_list_node_t node;
|
15
|
+
// int value;
|
16
|
+
// } yp_int_node_t;
|
17
|
+
//
|
18
|
+
// yp_list_t list;
|
19
|
+
// yp_list_init(&list);
|
20
|
+
//
|
21
|
+
// yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
|
22
|
+
// node->value = 5;
|
23
|
+
//
|
24
|
+
// yp_list_append(&list, &node->node);
|
25
|
+
//
|
26
|
+
// The yp_list_t struct is used to represent the overall linked list. It
|
27
|
+
// contains a pointer to the head and tail of the list. This allows for easy
|
28
|
+
// iteration and appending of new nodes.
|
29
|
+
|
30
|
+
#ifndef YARP_LIST_H
|
31
|
+
#define YARP_LIST_H
|
32
|
+
|
33
|
+
#include "yarp/defines.h"
|
34
|
+
|
35
|
+
#include <stdbool.h>
|
36
|
+
#include <stddef.h>
|
37
|
+
#include <stdint.h>
|
38
|
+
#include <stdlib.h>
|
39
|
+
|
40
|
+
// This represents a node in the linked list.
|
41
|
+
typedef struct yp_list_node {
|
42
|
+
struct yp_list_node *next;
|
43
|
+
} yp_list_node_t;
|
44
|
+
|
45
|
+
// This represents the overall linked list. It keeps a pointer to the head and
|
46
|
+
// tail so that iteration is easy and pushing new nodes is easy.
|
47
|
+
typedef struct {
|
48
|
+
yp_list_node_t *head;
|
49
|
+
yp_list_node_t *tail;
|
50
|
+
} yp_list_t;
|
51
|
+
|
52
|
+
// Initializes a new list.
|
53
|
+
YP_EXPORTED_FUNCTION void yp_list_init(yp_list_t *list);
|
54
|
+
|
55
|
+
// Returns true if the given list is empty.
|
56
|
+
YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
|
57
|
+
|
58
|
+
// Returns the size of the list in O(n) time.
|
59
|
+
YP_EXPORTED_FUNCTION uint32_t yp_list_size(yp_list_t *list);
|
60
|
+
|
61
|
+
// Append a node to the given list.
|
62
|
+
void yp_list_append(yp_list_t *list, yp_list_node_t *node);
|
63
|
+
|
64
|
+
// Deallocate the internal state of the given list.
|
65
|
+
YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
|
66
|
+
|
67
|
+
#endif
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#ifndef YP_MEMCHR_H
|
2
|
+
#define YP_MEMCHR_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/enc/yp_encoding.h"
|
6
|
+
|
7
|
+
#include <stddef.h>
|
8
|
+
|
9
|
+
// We need to roll our own memchr to handle cases where the encoding changes and
|
10
|
+
// we need to search for a character in a buffer that could be the trailing byte
|
11
|
+
// of a multibyte character.
|
12
|
+
void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
|
13
|
+
|
14
|
+
#endif
|
@@ -0,0 +1,54 @@
|
|
1
|
+
// When compiling the syntax tree, it's necessary to know the line and column
|
2
|
+
// of many nodes. This is necessary to support things like error messages,
|
3
|
+
// tracepoints, etc.
|
4
|
+
//
|
5
|
+
// It's possible that we could store the start line, start column, end line, and
|
6
|
+
// end column on every node in addition to the offsets that we already store,
|
7
|
+
// but that would be quite a lot of memory overhead.
|
8
|
+
|
9
|
+
#ifndef YP_NEWLINE_LIST_H
|
10
|
+
#define YP_NEWLINE_LIST_H
|
11
|
+
|
12
|
+
#include "yarp/defines.h"
|
13
|
+
|
14
|
+
#include <assert.h>
|
15
|
+
#include <stdbool.h>
|
16
|
+
#include <stddef.h>
|
17
|
+
#include <stdlib.h>
|
18
|
+
|
19
|
+
// A list of offsets of newlines in a string. The offsets are assumed to be
|
20
|
+
// sorted/inserted in ascending order.
|
21
|
+
typedef struct {
|
22
|
+
const char *start;
|
23
|
+
|
24
|
+
size_t *offsets;
|
25
|
+
size_t size;
|
26
|
+
size_t capacity;
|
27
|
+
|
28
|
+
size_t last_offset;
|
29
|
+
size_t last_index;
|
30
|
+
} yp_newline_list_t;
|
31
|
+
|
32
|
+
// A line and column in a string.
|
33
|
+
typedef struct {
|
34
|
+
size_t line;
|
35
|
+
size_t column;
|
36
|
+
} yp_line_column_t;
|
37
|
+
|
38
|
+
// Initialize a new newline list with the given capacity. Returns true if the
|
39
|
+
// allocation of the offsets succeeds, otherwise returns false.
|
40
|
+
bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
|
41
|
+
|
42
|
+
// Append a new offset to the newline list. Returns true if the reallocation of
|
43
|
+
// the offsets succeeds (if one was necessary), otherwise returns false.
|
44
|
+
bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor);
|
45
|
+
|
46
|
+
// Returns the line and column of the given offset. If the offset is not in the
|
47
|
+
// list, the line and column of the closest offset less than the given offset
|
48
|
+
// are returned.
|
49
|
+
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor);
|
50
|
+
|
51
|
+
// Free the internal memory allocated for the newline list.
|
52
|
+
void yp_newline_list_free(yp_newline_list_t *list);
|
53
|
+
|
54
|
+
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef YP_STATE_STACK_H
|
2
|
+
#define YP_STATE_STACK_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <stdbool.h>
|
7
|
+
#include <stdint.h>
|
8
|
+
|
9
|
+
// A struct that represents a stack of bools.
|
10
|
+
typedef uint32_t yp_state_stack_t;
|
11
|
+
|
12
|
+
// Initializes the state stack to an empty stack.
|
13
|
+
void yp_state_stack_init(yp_state_stack_t *stack);
|
14
|
+
|
15
|
+
// Pushes a value onto the stack.
|
16
|
+
void yp_state_stack_push(yp_state_stack_t *stack, bool value);
|
17
|
+
|
18
|
+
// Pops a value off the stack.
|
19
|
+
void yp_state_stack_pop(yp_state_stack_t *stack);
|
20
|
+
|
21
|
+
// Returns the value at the top of the stack.
|
22
|
+
bool yp_state_stack_p(yp_state_stack_t *stack);
|
23
|
+
|
24
|
+
#endif
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#ifndef YARP_STRING_H
|
2
|
+
#define YARP_STRING_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stddef.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <string.h>
|
11
|
+
|
12
|
+
// This struct represents a string value.
|
13
|
+
typedef struct {
|
14
|
+
enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
|
15
|
+
char *source;
|
16
|
+
size_t length;
|
17
|
+
} yp_string_t;
|
18
|
+
|
19
|
+
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
|
20
|
+
|
21
|
+
// Initialize a shared string that is based on initial input.
|
22
|
+
void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
|
23
|
+
|
24
|
+
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
+
void yp_string_owned_init(yp_string_t *string, char *source, size_t length);
|
26
|
+
|
27
|
+
// Initialize a constant string that doesn't own its memory source.
|
28
|
+
void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
|
29
|
+
|
30
|
+
// Read the file indicated by the filepath parameter into source and load its
|
31
|
+
// contents and size into the given yp_string_t.
|
32
|
+
// The given yp_string_t should be freed using yp_string_free() when it is no longer used.
|
33
|
+
//
|
34
|
+
// We want to use demand paging as much as possible in order to avoid having to
|
35
|
+
// read the entire file into memory (which could be detrimental to performance
|
36
|
+
// for large files). This means that if we're on windows we'll use
|
37
|
+
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
38
|
+
// `mmap`, and on other POSIX systems we'll use `read`.
|
39
|
+
bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
|
40
|
+
|
41
|
+
// Returns the memory size associated with the string.
|
42
|
+
size_t yp_string_memsize(const yp_string_t *string);
|
43
|
+
|
44
|
+
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
45
|
+
// copy over the previous source.
|
46
|
+
void yp_string_ensure_owned(yp_string_t *string);
|
47
|
+
|
48
|
+
// Returns the length associated with the string.
|
49
|
+
YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
|
50
|
+
|
51
|
+
// Returns the start pointer associated with the string.
|
52
|
+
YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
|
53
|
+
|
54
|
+
// Free the associated memory of the given string.
|
55
|
+
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
|
56
|
+
|
57
|
+
#endif // YARP_STRING_H
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#ifndef YARP_STRING_LIST_H
|
2
|
+
#define YARP_STRING_LIST_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/util/yp_string.h"
|
6
|
+
|
7
|
+
#include <stddef.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
|
10
|
+
typedef struct {
|
11
|
+
yp_string_t *strings;
|
12
|
+
size_t length;
|
13
|
+
size_t capacity;
|
14
|
+
} yp_string_list_t;
|
15
|
+
|
16
|
+
// Allocate a new yp_string_list_t.
|
17
|
+
yp_string_list_t * yp_string_list_alloc(void);
|
18
|
+
|
19
|
+
// Initialize a yp_string_list_t with its default values.
|
20
|
+
YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
|
21
|
+
|
22
|
+
// Append a yp_string_t to the given string list.
|
23
|
+
void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
|
24
|
+
|
25
|
+
// Free the memory associated with the string list.
|
26
|
+
YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
|
27
|
+
|
28
|
+
#endif
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef YP_STRPBRK_H
|
2
|
+
#define YP_STRPBRK_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/parser.h"
|
6
|
+
|
7
|
+
#include <stddef.h>
|
8
|
+
#include <string.h>
|
9
|
+
|
10
|
+
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
11
|
+
// has undefined behavior when the source string is not null-terminated. We want
|
12
|
+
// to support strings that are not null-terminated because yp_parse does not
|
13
|
+
// have the contract that the string is null-terminated. (This is desirable
|
14
|
+
// because it means the extension can call yp_parse with the result of a call to
|
15
|
+
// mmap).
|
16
|
+
//
|
17
|
+
// The standard library strpbrk also does not support passing a maximum length
|
18
|
+
// to search. We want to support this for the reason mentioned above, but we
|
19
|
+
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
20
|
+
// within strings, comments, regular expressions, etc. So we need to be able to
|
21
|
+
// skip past them.
|
22
|
+
//
|
23
|
+
// Finally, we want to support encodings wherein the charset could contain
|
24
|
+
// characters that are trailing bytes of multi-byte characters. For example, in
|
25
|
+
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
26
|
+
// need to take a slower path and iterate one multi-byte character at a time.
|
27
|
+
const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
|
28
|
+
|
29
|
+
#endif
|
data/include/yarp.h
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#ifndef YARP_H
|
2
|
+
#define YARP_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/ast.h"
|
6
|
+
#include "yarp/diagnostic.h"
|
7
|
+
#include "yarp/node.h"
|
8
|
+
#include "yarp/pack.h"
|
9
|
+
#include "yarp/parser.h"
|
10
|
+
#include "yarp/regexp.h"
|
11
|
+
#include "yarp/unescape.h"
|
12
|
+
#include "yarp/util/yp_buffer.h"
|
13
|
+
#include "yarp/util/yp_char.h"
|
14
|
+
#include "yarp/util/yp_memchr.h"
|
15
|
+
#include "yarp/util/yp_strpbrk.h"
|
16
|
+
|
17
|
+
#include <assert.h>
|
18
|
+
#include <stdarg.h>
|
19
|
+
#include <stdbool.h>
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
|
25
|
+
#ifndef _WIN32
|
26
|
+
#include <strings.h>
|
27
|
+
#endif
|
28
|
+
|
29
|
+
void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
30
|
+
|
31
|
+
void yp_print_node(yp_parser_t *parser, yp_node_t *node);
|
32
|
+
|
33
|
+
// The YARP version and the serialization format.
|
34
|
+
YP_EXPORTED_FUNCTION const char * yp_version(void);
|
35
|
+
|
36
|
+
// Initialize a parser with the given start and end pointers.
|
37
|
+
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath);
|
38
|
+
|
39
|
+
// Register a callback that will be called whenever YARP changes the encoding it
|
40
|
+
// is using to parse based on the magic comment.
|
41
|
+
YP_EXPORTED_FUNCTION void yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback);
|
42
|
+
|
43
|
+
// Register a callback that will be called when YARP encounters a magic comment
|
44
|
+
// with an encoding referenced that it doesn't understand. The callback should
|
45
|
+
// return NULL if it also doesn't understand the encoding or it should return a
|
46
|
+
// pointer to a yp_encoding_t struct that contains the functions necessary to
|
47
|
+
// parse identifiers.
|
48
|
+
YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback);
|
49
|
+
|
50
|
+
// Free any memory associated with the given parser.
|
51
|
+
YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
|
52
|
+
|
53
|
+
// Parse the Ruby source associated with the given parser and return the tree.
|
54
|
+
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
|
55
|
+
|
56
|
+
// Pretty-prints the AST represented by the given node to the given buffer.
|
57
|
+
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
58
|
+
|
59
|
+
// Serialize the AST represented by the given node to the given buffer.
|
60
|
+
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
61
|
+
|
62
|
+
// Parse and serialize the AST represented by the given source to the given
|
63
|
+
// buffer.
|
64
|
+
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
65
|
+
|
66
|
+
// Returns a string representation of the given token type.
|
67
|
+
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
|
68
|
+
|
69
|
+
#endif
|