yarp 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -8
- data/CONTRIBUTING.md +2 -2
- data/Makefile +5 -5
- data/README.md +11 -12
- data/config.yml +6 -2
- data/docs/build_system.md +21 -21
- data/docs/building.md +4 -4
- data/docs/configuration.md +25 -21
- data/docs/design.md +2 -2
- data/docs/encoding.md +17 -17
- data/docs/fuzzing.md +4 -4
- data/docs/heredocs.md +3 -3
- data/docs/mapping.md +94 -94
- data/docs/ripper.md +4 -4
- data/docs/ruby_api.md +11 -11
- data/docs/serialization.md +17 -16
- data/docs/testing.md +6 -6
- data/ext/prism/api_node.c +4725 -0
- data/ext/{yarp → prism}/api_pack.c +82 -82
- data/ext/{yarp → prism}/extconf.rb +13 -13
- data/ext/{yarp → prism}/extension.c +175 -168
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/{yarp → prism}/parser.h +143 -142
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
- data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
- data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/{yarp → prism}/ffi.rb +66 -67
- data/lib/{yarp → prism}/lex_compat.rb +40 -43
- data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
- data/lib/{yarp → prism}/node.rb +2012 -2593
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/{yarp → prism}/pack.rb +1 -1
- data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
- data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +266 -0
- data/lib/{yarp → prism}/pattern.rb +14 -14
- data/lib/{yarp → prism}/ripper_compat.rb +5 -5
- data/lib/{yarp → prism}/serialize.rb +12 -7
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/lib/yarp.rb +2 -614
- data/src/diagnostic.c +213 -208
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
- data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +1293 -1233
- data/src/pack.c +247 -247
- data/src/prettyprint.c +1479 -1479
- data/src/{yarp.c → prism.c} +5205 -5083
- data/src/regexp.c +132 -132
- data/src/serialize.c +1121 -1121
- data/src/token_type.c +169 -167
- data/src/unescape.c +106 -87
- data/src/util/pm_buffer.c +103 -0
- data/src/util/{yp_char.c → pm_char.c} +72 -72
- data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
- data/src/util/{yp_list.c → pm_list.c} +10 -10
- data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
- data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
- data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
- data/src/util/{yp_string.c → pm_string.c} +38 -38
- data/src/util/pm_string_list.c +29 -0
- data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
- data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
- data/yarp.gemspec +68 -59
- metadata +70 -61
- data/ext/yarp/api_node.c +0 -4728
- data/ext/yarp/extension.h +0 -18
- data/include/yarp/ast.h +0 -1929
- data/include/yarp/defines.h +0 -45
- data/include/yarp/diagnostic.h +0 -226
- data/include/yarp/node.h +0 -42
- data/include/yarp/pack.h +0 -141
- data/include/yarp/regexp.h +0 -19
- data/include/yarp/unescape.h +0 -44
- data/include/yarp/util/yp_buffer.h +0 -51
- data/include/yarp/util/yp_memchr.h +0 -14
- data/include/yarp/util/yp_state_stack.h +0 -24
- data/include/yarp/util/yp_string_list.h +0 -25
- data/include/yarp/version.h +0 -4
- data/include/yarp.h +0 -82
- data/src/enc/yp_big5.c +0 -52
- data/src/enc/yp_euc_jp.c +0 -58
- data/src/enc/yp_shift_jis.c +0 -56
- data/src/enc/yp_windows_31j.c +0 -56
- data/src/util/yp_buffer.c +0 -101
- data/src/util/yp_string_list.c +0 -29
@@ -1,34 +1,34 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef PRISM_CHAR_H
|
2
|
+
#define PRISM_CHAR_H
|
3
3
|
|
4
|
-
#include "
|
5
|
-
#include "
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/util/pm_newline_list.h"
|
6
6
|
|
7
7
|
#include <stdbool.h>
|
8
8
|
#include <stddef.h>
|
9
9
|
|
10
10
|
// Returns the number of characters at the start of the string that are
|
11
11
|
// whitespace. Disallows searching past the given maximum number of characters.
|
12
|
-
size_t
|
12
|
+
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
|
13
13
|
|
14
14
|
// Returns the number of characters at the start of the string that are
|
15
15
|
// whitespace while also tracking the location of each newline. Disallows
|
16
16
|
// searching past the given maximum number of characters.
|
17
17
|
size_t
|
18
|
-
|
18
|
+
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
|
19
19
|
|
20
20
|
// Returns the number of characters at the start of the string that are inline
|
21
21
|
// whitespace. Disallows searching past the given maximum number of characters.
|
22
|
-
size_t
|
22
|
+
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
|
23
23
|
|
24
24
|
// Returns the number of characters at the start of the string that are decimal
|
25
25
|
// digits. Disallows searching past the given maximum number of characters.
|
26
|
-
size_t
|
26
|
+
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
|
27
27
|
|
28
28
|
// Returns the number of characters at the start of the string that are
|
29
29
|
// hexadecimal digits. Disallows searching past the given maximum number of
|
30
30
|
// characters.
|
31
|
-
size_t
|
31
|
+
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
|
32
32
|
|
33
33
|
// Returns the number of characters at the start of the string that are octal
|
34
34
|
// digits or underscores. Disallows searching past the given maximum number of
|
@@ -37,7 +37,7 @@ size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
|
|
37
37
|
// If multiple underscores are found in a row or if an underscore is
|
38
38
|
// found at the end of the number, then the invalid pointer is set to the index
|
39
39
|
// of the first invalid underscore.
|
40
|
-
size_t
|
40
|
+
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
41
41
|
|
42
42
|
// Returns the number of characters at the start of the string that are decimal
|
43
43
|
// digits or underscores. Disallows searching past the given maximum number of
|
@@ -46,7 +46,7 @@ size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uin
|
|
46
46
|
// If multiple underscores are found in a row or if an underscore is
|
47
47
|
// found at the end of the number, then the invalid pointer is set to the index
|
48
48
|
// of the first invalid underscore.
|
49
|
-
size_t
|
49
|
+
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
50
50
|
|
51
51
|
// Returns the number of characters at the start of the string that are
|
52
52
|
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
@@ -55,11 +55,11 @@ size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const u
|
|
55
55
|
// If multiple underscores are found in a row or if an underscore is
|
56
56
|
// found at the end of the number, then the invalid pointer is set to the index
|
57
57
|
// of the first invalid underscore.
|
58
|
-
size_t
|
58
|
+
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
59
59
|
|
60
60
|
// Returns the number of characters at the start of the string that are regexp
|
61
61
|
// options. Disallows searching past the given maximum number of characters.
|
62
|
-
size_t
|
62
|
+
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
|
63
63
|
|
64
64
|
// Returns the number of characters at the start of the string that are binary
|
65
65
|
// digits or underscores. Disallows searching past the given maximum number of
|
@@ -68,24 +68,24 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
|
|
68
68
|
// If multiple underscores are found in a row or if an underscore is
|
69
69
|
// found at the end of the number, then the invalid pointer is set to the index
|
70
70
|
// of the first invalid underscore.
|
71
|
-
size_t
|
71
|
+
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
72
72
|
|
73
73
|
// Returns true if the given character is a whitespace character.
|
74
|
-
bool
|
74
|
+
bool pm_char_is_whitespace(const uint8_t b);
|
75
75
|
|
76
76
|
// Returns true if the given character is an inline whitespace character.
|
77
|
-
bool
|
77
|
+
bool pm_char_is_inline_whitespace(const uint8_t b);
|
78
78
|
|
79
79
|
// Returns true if the given character is a binary digit.
|
80
|
-
bool
|
80
|
+
bool pm_char_is_binary_digit(const uint8_t b);
|
81
81
|
|
82
82
|
// Returns true if the given character is an octal digit.
|
83
|
-
bool
|
83
|
+
bool pm_char_is_octal_digit(const uint8_t b);
|
84
84
|
|
85
85
|
// Returns true if the given character is a decimal digit.
|
86
|
-
bool
|
86
|
+
bool pm_char_is_decimal_digit(const uint8_t b);
|
87
87
|
|
88
88
|
// Returns true if the given character is a hexadecimal digit.
|
89
|
-
bool
|
89
|
+
bool pm_char_is_hexadecimal_digit(const uint8_t b);
|
90
90
|
|
91
91
|
#endif
|
@@ -3,10 +3,10 @@
|
|
3
3
|
// equality. This comparison ends up being much faster than strcmp, since it
|
4
4
|
// only requires a single integer comparison.
|
5
5
|
|
6
|
-
#ifndef
|
7
|
-
#define
|
6
|
+
#ifndef PRISM_CONSTANT_POOL_H
|
7
|
+
#define PRISM_CONSTANT_POOL_H
|
8
8
|
|
9
|
-
#include "
|
9
|
+
#include "prism/defines.h"
|
10
10
|
|
11
11
|
#include <assert.h>
|
12
12
|
#include <stdbool.h>
|
@@ -14,61 +14,65 @@
|
|
14
14
|
#include <stdlib.h>
|
15
15
|
#include <string.h>
|
16
16
|
|
17
|
-
typedef uint32_t
|
17
|
+
typedef uint32_t pm_constant_id_t;
|
18
18
|
|
19
19
|
typedef struct {
|
20
|
-
|
20
|
+
pm_constant_id_t *ids;
|
21
21
|
size_t size;
|
22
22
|
size_t capacity;
|
23
|
-
}
|
23
|
+
} pm_constant_id_list_t;
|
24
24
|
|
25
25
|
// Initialize a list of constant ids.
|
26
|
-
void
|
26
|
+
void pm_constant_id_list_init(pm_constant_id_list_t *list);
|
27
27
|
|
28
28
|
// Append a constant id to a list of constant ids. Returns false if any
|
29
29
|
// potential reallocations fail.
|
30
|
-
bool
|
30
|
+
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
|
31
31
|
|
32
32
|
// Checks if the current constant id list includes the given constant id.
|
33
33
|
bool
|
34
|
-
|
34
|
+
pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
|
35
35
|
|
36
36
|
// Get the memory size of a list of constant ids.
|
37
|
-
size_t
|
37
|
+
size_t pm_constant_id_list_memsize(pm_constant_id_list_t *list);
|
38
38
|
|
39
39
|
// Free the memory associated with a list of constant ids.
|
40
|
-
void
|
40
|
+
void pm_constant_id_list_free(pm_constant_id_list_t *list);
|
41
41
|
|
42
42
|
typedef struct {
|
43
43
|
unsigned int id: 31;
|
44
44
|
bool owned: 1;
|
45
|
+
uint32_t hash;
|
46
|
+
} pm_constant_pool_bucket_t;
|
47
|
+
|
48
|
+
typedef struct {
|
45
49
|
const uint8_t *start;
|
46
50
|
size_t length;
|
47
|
-
|
48
|
-
} yp_constant_t;
|
51
|
+
} pm_constant_t;
|
49
52
|
|
50
53
|
typedef struct {
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
pm_constant_pool_bucket_t *buckets;
|
55
|
+
pm_constant_t *constants;
|
56
|
+
uint32_t size;
|
57
|
+
uint32_t capacity;
|
58
|
+
} pm_constant_pool_t;
|
55
59
|
|
56
60
|
// Define an empty constant pool.
|
57
|
-
#define
|
61
|
+
#define PM_CONSTANT_POOL_EMPTY ((pm_constant_pool_t) { .buckets = NULL, .constants = NULL, .size = 0, .capacity = 0 })
|
58
62
|
|
59
63
|
// Initialize a new constant pool with a given capacity.
|
60
|
-
bool
|
64
|
+
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
|
61
65
|
|
62
66
|
// Insert a constant into a constant pool that is a slice of a source string.
|
63
67
|
// Returns the id of the constant, or 0 if any potential calls to resize fail.
|
64
|
-
|
68
|
+
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
65
69
|
|
66
70
|
// Insert a constant into a constant pool from memory that is now owned by the
|
67
71
|
// constant pool. Returns the id of the constant, or 0 if any potential calls to
|
68
72
|
// resize fail.
|
69
|
-
|
73
|
+
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
70
74
|
|
71
75
|
// Free the memory associated with a constant pool.
|
72
|
-
void
|
76
|
+
void pm_constant_pool_free(pm_constant_pool_t *pool);
|
73
77
|
|
74
78
|
#endif
|
@@ -5,30 +5,30 @@
|
|
5
5
|
// The linked list itself operates off a set of pointers. Because the pointers
|
6
6
|
// are not necessarily sequential, they can be of any size. We use this fact to
|
7
7
|
// allow the consumer of this linked list to extend the node struct to include
|
8
|
-
// any data they want. This is done by using the
|
8
|
+
// any data they want. This is done by using the pm_list_node_t as the first
|
9
9
|
// member of the struct.
|
10
10
|
//
|
11
11
|
// For example, if we want to store a list of integers, we can do the following:
|
12
12
|
//
|
13
13
|
// typedef struct {
|
14
|
-
//
|
14
|
+
// pm_list_node_t node;
|
15
15
|
// int value;
|
16
|
-
// }
|
16
|
+
// } pm_int_node_t;
|
17
17
|
//
|
18
|
-
//
|
19
|
-
//
|
18
|
+
// pm_list_t list = PM_LIST_EMPTY;
|
19
|
+
// pm_int_node_t *node = malloc(sizeof(pm_int_node_t));
|
20
20
|
// node->value = 5;
|
21
21
|
//
|
22
|
-
//
|
22
|
+
// pm_list_append(&list, &node->node);
|
23
23
|
//
|
24
|
-
// The
|
24
|
+
// The pm_list_t struct is used to represent the overall linked list. It
|
25
25
|
// contains a pointer to the head and tail of the list. This allows for easy
|
26
26
|
// iteration and appending of new nodes.
|
27
27
|
|
28
|
-
#ifndef
|
29
|
-
#define
|
28
|
+
#ifndef PRISM_LIST_H
|
29
|
+
#define PRISM_LIST_H
|
30
30
|
|
31
|
-
#include "
|
31
|
+
#include "prism/defines.h"
|
32
32
|
|
33
33
|
#include <stdbool.h>
|
34
34
|
#include <stddef.h>
|
@@ -36,32 +36,32 @@
|
|
36
36
|
#include <stdlib.h>
|
37
37
|
|
38
38
|
// This represents a node in the linked list.
|
39
|
-
typedef struct
|
40
|
-
struct
|
41
|
-
}
|
39
|
+
typedef struct pm_list_node {
|
40
|
+
struct pm_list_node *next;
|
41
|
+
} pm_list_node_t;
|
42
42
|
|
43
43
|
// This represents the overall linked list. It keeps a pointer to the head and
|
44
44
|
// tail so that iteration is easy and pushing new nodes is easy.
|
45
45
|
typedef struct {
|
46
46
|
size_t size;
|
47
|
-
|
48
|
-
|
49
|
-
}
|
47
|
+
pm_list_node_t *head;
|
48
|
+
pm_list_node_t *tail;
|
49
|
+
} pm_list_t;
|
50
50
|
|
51
51
|
// This represents an empty list. It's used to initialize a stack-allocated list
|
52
52
|
// as opposed to a method call.
|
53
|
-
#define
|
53
|
+
#define PM_LIST_EMPTY ((pm_list_t) { .size = 0, .head = NULL, .tail = NULL })
|
54
54
|
|
55
55
|
// Returns true if the given list is empty.
|
56
|
-
|
56
|
+
PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
|
57
57
|
|
58
58
|
// Returns the size of the list.
|
59
|
-
|
59
|
+
PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
|
60
60
|
|
61
61
|
// Append a node to the given list.
|
62
|
-
void
|
62
|
+
void pm_list_append(pm_list_t *list, pm_list_node_t *node);
|
63
63
|
|
64
64
|
// Deallocate the internal state of the given list.
|
65
|
-
|
65
|
+
PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
|
66
66
|
|
67
67
|
#endif
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#ifndef PRISM_MEMCHR_H
|
2
|
+
#define PRISM_MEMCHR_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/enc/pm_encoding.h"
|
6
|
+
|
7
|
+
#include <stddef.h>
|
8
|
+
|
9
|
+
// We need to roll our own memchr to handle cases where the encoding changes and
|
10
|
+
// we need to search for a character in a buffer that could be the trailing byte
|
11
|
+
// of a multibyte character.
|
12
|
+
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
|
13
|
+
|
14
|
+
#endif
|
@@ -6,10 +6,10 @@
|
|
6
6
|
// end column on every node in addition to the offsets that we already store,
|
7
7
|
// but that would be quite a lot of memory overhead.
|
8
8
|
|
9
|
-
#ifndef
|
10
|
-
#define
|
9
|
+
#ifndef PRISM_NEWLINE_LIST_H
|
10
|
+
#define PRISM_NEWLINE_LIST_H
|
11
11
|
|
12
|
-
#include "
|
12
|
+
#include "prism/defines.h"
|
13
13
|
|
14
14
|
#include <assert.h>
|
15
15
|
#include <stdbool.h>
|
@@ -27,35 +27,35 @@ typedef struct {
|
|
27
27
|
|
28
28
|
size_t last_offset;
|
29
29
|
size_t last_index;
|
30
|
-
}
|
30
|
+
} pm_newline_list_t;
|
31
31
|
|
32
32
|
// A line and column in a string.
|
33
33
|
typedef struct {
|
34
34
|
size_t line;
|
35
35
|
size_t column;
|
36
|
-
}
|
36
|
+
} pm_line_column_t;
|
37
37
|
|
38
|
-
#define
|
38
|
+
#define PM_NEWLINE_LIST_EMPTY ((pm_newline_list_t) { \
|
39
39
|
.start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
|
40
40
|
})
|
41
41
|
|
42
42
|
// Initialize a new newline list with the given capacity. Returns true if the
|
43
43
|
// allocation of the offsets succeeds, otherwise returns false.
|
44
|
-
bool
|
44
|
+
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
|
45
45
|
|
46
46
|
// Append a new offset to the newline list. Returns true if the reallocation of
|
47
47
|
// the offsets succeeds (if one was necessary), otherwise returns false.
|
48
|
-
bool
|
48
|
+
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
|
49
49
|
|
50
50
|
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
51
|
-
bool
|
51
|
+
bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
|
52
52
|
|
53
53
|
// Returns the line and column of the given offset. If the offset is not in the
|
54
54
|
// list, the line and column of the closest offset less than the given offset
|
55
55
|
// are returned.
|
56
|
-
|
56
|
+
pm_line_column_t pm_newline_list_line_column(pm_newline_list_t *list, const uint8_t *cursor);
|
57
57
|
|
58
58
|
// Free the internal memory allocated for the newline list.
|
59
|
-
void
|
59
|
+
void pm_newline_list_free(pm_newline_list_t *list);
|
60
60
|
|
61
61
|
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef PRISM_STATE_STACK_H
|
2
|
+
#define PRISM_STATE_STACK_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
|
6
|
+
#include <stdbool.h>
|
7
|
+
#include <stdint.h>
|
8
|
+
|
9
|
+
// A struct that represents a stack of bools.
|
10
|
+
typedef uint32_t pm_state_stack_t;
|
11
|
+
|
12
|
+
// Initializes the state stack to an empty stack.
|
13
|
+
#define PM_STATE_STACK_EMPTY ((pm_state_stack_t) 0)
|
14
|
+
|
15
|
+
// Pushes a value onto the stack.
|
16
|
+
void pm_state_stack_push(pm_state_stack_t *stack, bool value);
|
17
|
+
|
18
|
+
// Pops a value off the stack.
|
19
|
+
void pm_state_stack_pop(pm_state_stack_t *stack);
|
20
|
+
|
21
|
+
// Returns the value at the top of the stack.
|
22
|
+
bool pm_state_stack_p(pm_state_stack_t *stack);
|
23
|
+
|
24
|
+
#endif
|
@@ -1,7 +1,7 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef PRISM_STRING_H
|
2
|
+
#define PRISM_STRING_H
|
3
3
|
|
4
|
-
#include "
|
4
|
+
#include "prism/defines.h"
|
5
5
|
|
6
6
|
#include <assert.h>
|
7
7
|
#include <stdbool.h>
|
@@ -11,51 +11,51 @@
|
|
11
11
|
|
12
12
|
// This struct represents a string value.
|
13
13
|
typedef struct {
|
14
|
-
enum {
|
14
|
+
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
|
15
15
|
const uint8_t *source;
|
16
16
|
size_t length;
|
17
|
-
}
|
17
|
+
} pm_string_t;
|
18
18
|
|
19
|
-
#define
|
19
|
+
#define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
|
20
20
|
|
21
21
|
// Initialize a shared string that is based on initial input.
|
22
|
-
void
|
22
|
+
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
|
23
23
|
|
24
24
|
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
-
void
|
25
|
+
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
|
26
26
|
|
27
27
|
// Initialize a constant string that doesn't own its memory source.
|
28
|
-
void
|
28
|
+
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
|
29
29
|
|
30
30
|
// Read the file indicated by the filepath parameter into source and load its
|
31
|
-
// contents and size into the given
|
32
|
-
// The given
|
31
|
+
// contents and size into the given pm_string_t.
|
32
|
+
// The given pm_string_t should be freed using pm_string_free() when it is no longer used.
|
33
33
|
//
|
34
34
|
// We want to use demand paging as much as possible in order to avoid having to
|
35
35
|
// read the entire file into memory (which could be detrimental to performance
|
36
36
|
// for large files). This means that if we're on windows we'll use
|
37
37
|
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
38
38
|
// `mmap`, and on other POSIX systems we'll use `read`.
|
39
|
-
|
39
|
+
PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
|
40
40
|
|
41
41
|
// Returns the memory size associated with the string.
|
42
|
-
size_t
|
42
|
+
size_t pm_string_memsize(const pm_string_t *string);
|
43
43
|
|
44
44
|
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
45
45
|
// copy over the previous source.
|
46
|
-
void
|
46
|
+
void pm_string_ensure_owned(pm_string_t *string);
|
47
47
|
|
48
48
|
// Returns the length associated with the string.
|
49
|
-
|
49
|
+
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
|
50
50
|
|
51
51
|
// Returns the start pointer associated with the string.
|
52
|
-
|
52
|
+
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
|
53
53
|
|
54
54
|
// Free the associated memory of the given string.
|
55
|
-
|
55
|
+
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
|
56
56
|
|
57
|
-
// Returns the size of the
|
57
|
+
// Returns the size of the pm_string_t struct. This is necessary to allocate the
|
58
58
|
// correct amount of memory in the FFI backend.
|
59
|
-
|
59
|
+
PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
|
60
60
|
|
61
|
-
#endif //
|
61
|
+
#endif // PRISM_STRING_H
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#ifndef PRISM_STRING_LIST_H
|
2
|
+
#define PRISM_STRING_LIST_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/util/pm_string.h"
|
6
|
+
|
7
|
+
#include <stddef.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
|
10
|
+
typedef struct {
|
11
|
+
pm_string_t *strings;
|
12
|
+
size_t length;
|
13
|
+
size_t capacity;
|
14
|
+
} pm_string_list_t;
|
15
|
+
|
16
|
+
// Initialize a pm_string_list_t with its default values.
|
17
|
+
PRISM_EXPORTED_FUNCTION void pm_string_list_init(pm_string_list_t *string_list);
|
18
|
+
|
19
|
+
// Append a pm_string_t to the given string list.
|
20
|
+
void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
|
21
|
+
|
22
|
+
// Free the memory associated with the string list.
|
23
|
+
PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
|
24
|
+
|
25
|
+
#endif
|
@@ -1,17 +1,17 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef PRISM_STRPBRK_H
|
2
|
+
#define PRISM_STRPBRK_H
|
3
3
|
|
4
|
-
#include "
|
5
|
-
#include "
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/parser.h"
|
6
6
|
|
7
7
|
#include <stddef.h>
|
8
8
|
#include <string.h>
|
9
9
|
|
10
10
|
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
11
11
|
// has undefined behavior when the source string is not null-terminated. We want
|
12
|
-
// to support strings that are not null-terminated because
|
12
|
+
// to support strings that are not null-terminated because pm_parse does not
|
13
13
|
// have the contract that the string is null-terminated. (This is desirable
|
14
|
-
// because it means the extension can call
|
14
|
+
// because it means the extension can call pm_parse with the result of a call to
|
15
15
|
// mmap).
|
16
16
|
//
|
17
17
|
// The standard library strpbrk also does not support passing a maximum length
|
@@ -24,6 +24,6 @@
|
|
24
24
|
// characters that are trailing bytes of multi-byte characters. For example, in
|
25
25
|
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
26
26
|
// need to take a slower path and iterate one multi-byte character at a time.
|
27
|
-
const uint8_t *
|
27
|
+
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
28
28
|
|
29
29
|
#endif
|
data/include/prism.h
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#ifndef PRISM_H
|
2
|
+
#define PRISM_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/ast.h"
|
6
|
+
#include "prism/diagnostic.h"
|
7
|
+
#include "prism/node.h"
|
8
|
+
#include "prism/pack.h"
|
9
|
+
#include "prism/parser.h"
|
10
|
+
#include "prism/regexp.h"
|
11
|
+
#include "prism/unescape.h"
|
12
|
+
#include "prism/util/pm_buffer.h"
|
13
|
+
#include "prism/util/pm_char.h"
|
14
|
+
#include "prism/util/pm_memchr.h"
|
15
|
+
#include "prism/util/pm_strpbrk.h"
|
16
|
+
#include "prism/version.h"
|
17
|
+
|
18
|
+
#include <assert.h>
|
19
|
+
#include <errno.h>
|
20
|
+
#include <stdarg.h>
|
21
|
+
#include <stdbool.h>
|
22
|
+
#include <stdint.h>
|
23
|
+
#include <stdio.h>
|
24
|
+
#include <stdlib.h>
|
25
|
+
#include <string.h>
|
26
|
+
|
27
|
+
#ifndef _WIN32
|
28
|
+
#include <strings.h>
|
29
|
+
#endif
|
30
|
+
|
31
|
+
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
32
|
+
|
33
|
+
void pm_print_node(pm_parser_t *parser, pm_node_t *node);
|
34
|
+
|
35
|
+
void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
|
36
|
+
|
37
|
+
// Generate a scope node from the given node.
|
38
|
+
void pm_scope_node_init(pm_node_t *node, pm_scope_node_t *dest);
|
39
|
+
|
40
|
+
// The prism version and the serialization format.
|
41
|
+
PRISM_EXPORTED_FUNCTION const char * pm_version(void);
|
42
|
+
|
43
|
+
// Initialize a parser with the given start and end pointers.
|
44
|
+
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
|
45
|
+
|
46
|
+
// Register a callback that will be called whenever prism changes the encoding it
|
47
|
+
// is using to parse based on the magic comment.
|
48
|
+
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
|
49
|
+
|
50
|
+
// Register a callback that will be called when prism encounters a magic comment
|
51
|
+
// with an encoding referenced that it doesn't understand. The callback should
|
52
|
+
// return NULL if it also doesn't understand the encoding or it should return a
|
53
|
+
// pointer to a pm_encoding_t struct that contains the functions necessary to
|
54
|
+
// parse identifiers.
|
55
|
+
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
|
56
|
+
|
57
|
+
// Free any memory associated with the given parser.
|
58
|
+
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
|
59
|
+
|
60
|
+
// Parse the Ruby source associated with the given parser and return the tree.
|
61
|
+
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
|
62
|
+
|
63
|
+
// Pretty-prints the AST represented by the given node to the given buffer.
|
64
|
+
PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
65
|
+
|
66
|
+
// Serialize the AST represented by the given node to the given buffer.
|
67
|
+
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
68
|
+
|
69
|
+
// Parse the given source to the AST and serialize the AST to the given buffer.
|
70
|
+
PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
|
71
|
+
|
72
|
+
// Lex the given source and serialize to the given buffer.
|
73
|
+
PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer);
|
74
|
+
|
75
|
+
// Parse and serialize both the AST and the tokens represented by the given
|
76
|
+
// source to the given buffer.
|
77
|
+
PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
|
78
|
+
|
79
|
+
// Returns a string representation of the given token type.
|
80
|
+
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
|
81
|
+
|
82
|
+
#endif
|