prism 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
@@ -1,3 +1,8 @@
|
|
1
|
+
/**
|
2
|
+
* @file pm_string.h
|
3
|
+
*
|
4
|
+
* A generic string type that can have various ownership semantics.
|
5
|
+
*/
|
1
6
|
#ifndef PRISM_STRING_H
|
2
7
|
#define PRISM_STRING_H
|
3
8
|
|
@@ -9,54 +14,137 @@
|
|
9
14
|
#include <stdlib.h>
|
10
15
|
#include <string.h>
|
11
16
|
|
12
|
-
//
|
17
|
+
// The following headers are necessary to read files using demand paging.
|
18
|
+
#ifdef _WIN32
|
19
|
+
#include <windows.h>
|
20
|
+
#else
|
21
|
+
#include <fcntl.h>
|
22
|
+
#include <sys/mman.h>
|
23
|
+
#include <sys/stat.h>
|
24
|
+
#include <unistd.h>
|
25
|
+
#endif
|
26
|
+
|
27
|
+
/**
|
28
|
+
* A generic string type that can have various ownership semantics.
|
29
|
+
*/
|
13
30
|
typedef struct {
|
31
|
+
/** A pointer to the start of the string. */
|
14
32
|
const uint8_t *source;
|
33
|
+
|
34
|
+
/** The length of the string in bytes of memory. */
|
15
35
|
size_t length;
|
16
|
-
|
17
|
-
|
36
|
+
|
37
|
+
/** The type of the string. This field determines how the string should be freed. */
|
38
|
+
enum {
|
39
|
+
/** This string is a constant string, and should not be freed. */
|
40
|
+
PM_STRING_CONSTANT,
|
41
|
+
|
42
|
+
/** This is a slice of another string, and should not be freed. */
|
43
|
+
PM_STRING_SHARED,
|
44
|
+
|
45
|
+
/** This string owns its memory, and should be freed using `pm_string_free`. */
|
46
|
+
PM_STRING_OWNED,
|
47
|
+
|
48
|
+
/** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
|
49
|
+
PM_STRING_MAPPED
|
50
|
+
} type;
|
18
51
|
} pm_string_t;
|
19
52
|
|
20
|
-
|
53
|
+
/**
|
54
|
+
* Returns the size of the pm_string_t struct. This is necessary to allocate the
|
55
|
+
* correct amount of memory in the FFI backend.
|
56
|
+
*
|
57
|
+
* @return The size of the pm_string_t struct.
|
58
|
+
*/
|
59
|
+
PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
|
60
|
+
|
61
|
+
/**
|
62
|
+
* Defines an empty string. This is useful for initializing a string that will
|
63
|
+
* be filled in later.
|
64
|
+
*/
|
65
|
+
#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
|
21
66
|
|
22
|
-
|
67
|
+
/**
|
68
|
+
* Initialize a shared string that is based on initial input.
|
69
|
+
*
|
70
|
+
* @param string The string to initialize.
|
71
|
+
* @param start The start of the string.
|
72
|
+
* @param end The end of the string.
|
73
|
+
*/
|
23
74
|
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
|
24
75
|
|
25
|
-
|
76
|
+
/**
|
77
|
+
* Initialize an owned string that is responsible for freeing allocated memory.
|
78
|
+
*
|
79
|
+
* @param string The string to initialize.
|
80
|
+
* @param source The source of the string.
|
81
|
+
* @param length The length of the string.
|
82
|
+
*/
|
26
83
|
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
|
27
84
|
|
28
|
-
|
85
|
+
/**
|
86
|
+
* Initialize a constant string that doesn't own its memory source.
|
87
|
+
*
|
88
|
+
* @param string The string to initialize.
|
89
|
+
* @param source The source of the string.
|
90
|
+
* @param length The length of the string.
|
91
|
+
*/
|
29
92
|
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
|
30
93
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
94
|
+
/**
|
95
|
+
* Read the file indicated by the filepath parameter into source and load its
|
96
|
+
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
97
|
+
* should be freed using `pm_string_free` when it is no longer used.
|
98
|
+
*
|
99
|
+
* We want to use demand paging as much as possible in order to avoid having to
|
100
|
+
* read the entire file into memory (which could be detrimental to performance
|
101
|
+
* for large files). This means that if we're on windows we'll use
|
102
|
+
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
103
|
+
* `mmap`, and on other POSIX systems we'll use `read`.
|
104
|
+
*
|
105
|
+
* @param string The string to initialize.
|
106
|
+
* @param filepath The filepath to read.
|
107
|
+
* @return Whether or not the file was successfully mapped.
|
108
|
+
*/
|
40
109
|
PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
|
41
110
|
|
42
|
-
|
111
|
+
/**
|
112
|
+
* Returns the memory size associated with the string.
|
113
|
+
*
|
114
|
+
* @param string The string to get the memory size of.
|
115
|
+
* @return The size of the memory associated with the string.
|
116
|
+
*/
|
43
117
|
size_t pm_string_memsize(const pm_string_t *string);
|
44
118
|
|
45
|
-
|
46
|
-
|
119
|
+
/**
|
120
|
+
* Ensure the string is owned. If it is not, then reinitialize it as owned and
|
121
|
+
* copy over the previous source.
|
122
|
+
*
|
123
|
+
* @param string The string to ensure is owned.
|
124
|
+
*/
|
47
125
|
void pm_string_ensure_owned(pm_string_t *string);
|
48
126
|
|
49
|
-
|
127
|
+
/**
|
128
|
+
* Returns the length associated with the string.
|
129
|
+
*
|
130
|
+
* @param string The string to get the length of.
|
131
|
+
* @return The length of the string.
|
132
|
+
*/
|
50
133
|
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
|
51
134
|
|
52
|
-
|
135
|
+
/**
|
136
|
+
* Returns the start pointer associated with the string.
|
137
|
+
*
|
138
|
+
* @param string The string to get the start pointer of.
|
139
|
+
* @return The start pointer of the string.
|
140
|
+
*/
|
53
141
|
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
|
54
142
|
|
55
|
-
|
143
|
+
/**
|
144
|
+
* Free the associated memory of the given string.
|
145
|
+
*
|
146
|
+
* @param string The string to free.
|
147
|
+
*/
|
56
148
|
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
|
57
149
|
|
58
|
-
|
59
|
-
// correct amount of memory in the FFI backend.
|
60
|
-
PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
|
61
|
-
|
62
|
-
#endif // PRISM_STRING_H
|
150
|
+
#endif
|
@@ -1,3 +1,8 @@
|
|
1
|
+
/**
|
2
|
+
* @file pm_string_list.h
|
3
|
+
*
|
4
|
+
* A list of strings.
|
5
|
+
*/
|
1
6
|
#ifndef PRISM_STRING_LIST_H
|
2
7
|
#define PRISM_STRING_LIST_H
|
3
8
|
|
@@ -7,19 +12,33 @@
|
|
7
12
|
#include <stddef.h>
|
8
13
|
#include <stdlib.h>
|
9
14
|
|
15
|
+
/**
|
16
|
+
* A list of strings.
|
17
|
+
*/
|
10
18
|
typedef struct {
|
11
|
-
|
19
|
+
/** The length of the string list. */
|
12
20
|
size_t length;
|
21
|
+
|
22
|
+
/** The capacity of the string list that has been allocated. */
|
13
23
|
size_t capacity;
|
14
|
-
} pm_string_list_t;
|
15
24
|
|
16
|
-
|
17
|
-
|
25
|
+
/** A pointer to the start of the string list. */
|
26
|
+
pm_string_t *strings;
|
27
|
+
} pm_string_list_t;
|
18
28
|
|
19
|
-
|
29
|
+
/**
|
30
|
+
* Append a pm_string_t to the given string list.
|
31
|
+
*
|
32
|
+
* @param string_list The string list to append to.
|
33
|
+
* @param string The string to append.
|
34
|
+
*/
|
20
35
|
void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
|
21
36
|
|
22
|
-
|
37
|
+
/**
|
38
|
+
* Free the memory associated with the string list.
|
39
|
+
*
|
40
|
+
* @param string_list The string list to free.
|
41
|
+
*/
|
23
42
|
PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
|
24
43
|
|
25
44
|
#endif
|
@@ -0,0 +1,32 @@
|
|
1
|
+
/**
|
2
|
+
* @file pm_strncasecmp.h
|
3
|
+
*
|
4
|
+
* A custom strncasecmp implementation.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_STRNCASECMP_H
|
7
|
+
#define PRISM_STRNCASECMP_H
|
8
|
+
|
9
|
+
#include "prism/defines.h"
|
10
|
+
|
11
|
+
#include <ctype.h>
|
12
|
+
#include <stddef.h>
|
13
|
+
#include <stdint.h>
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Compare two strings, ignoring case, up to the given length. Returns 0 if the
|
17
|
+
* strings are equal, a negative number if string1 is less than string2, or a
|
18
|
+
* positive number if string1 is greater than string2.
|
19
|
+
*
|
20
|
+
* Note that this is effectively our own implementation of strncasecmp, but it's
|
21
|
+
* not available on all of the platforms we want to support so we're rolling it
|
22
|
+
* here.
|
23
|
+
*
|
24
|
+
* @param string1 The first string to compare.
|
25
|
+
* @param string2 The second string to compare
|
26
|
+
* @param length The maximum number of characters to compare.
|
27
|
+
* @return 0 if the strings are equal, a negative number if string1 is less than
|
28
|
+
* string2, or a positive number if string1 is greater than string2.
|
29
|
+
*/
|
30
|
+
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
|
31
|
+
|
32
|
+
#endif
|
@@ -1,3 +1,8 @@
|
|
1
|
+
/**
|
2
|
+
* @file pm_strpbrk.h
|
3
|
+
*
|
4
|
+
* A custom strpbrk implementation.
|
5
|
+
*/
|
1
6
|
#ifndef PRISM_STRPBRK_H
|
2
7
|
#define PRISM_STRPBRK_H
|
3
8
|
|
@@ -7,23 +12,32 @@
|
|
7
12
|
#include <stddef.h>
|
8
13
|
#include <string.h>
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
15
|
+
/**
|
16
|
+
* Here we have rolled our own version of strpbrk. The standard library strpbrk
|
17
|
+
* has undefined behavior when the source string is not null-terminated. We want
|
18
|
+
* to support strings that are not null-terminated because pm_parse does not
|
19
|
+
* have the contract that the string is null-terminated. (This is desirable
|
20
|
+
* because it means the extension can call pm_parse with the result of a call to
|
21
|
+
* mmap).
|
22
|
+
*
|
23
|
+
* The standard library strpbrk also does not support passing a maximum length
|
24
|
+
* to search. We want to support this for the reason mentioned above, but we
|
25
|
+
* also don't want it to stop on null bytes. Ruby actually allows null bytes
|
26
|
+
* within strings, comments, regular expressions, etc. So we need to be able to
|
27
|
+
* skip past them.
|
28
|
+
*
|
29
|
+
* Finally, we want to support encodings wherein the charset could contain
|
30
|
+
* characters that are trailing bytes of multi-byte characters. For example, in
|
31
|
+
* Shift-JIS, the backslash character can be a trailing byte. In that case we
|
32
|
+
* need to take a slower path and iterate one multi-byte character at a time.
|
33
|
+
*
|
34
|
+
* @param parser The parser.
|
35
|
+
* @param source The source string.
|
36
|
+
* @param charset The charset to search for.
|
37
|
+
* @param length The maximum length to search.
|
38
|
+
* @return A pointer to the first character in the source string that is in the
|
39
|
+
* charset, or NULL if no such character exists.
|
40
|
+
*/
|
27
41
|
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
28
42
|
|
29
43
|
#endif
|
data/include/prism/version.h
CHANGED
@@ -1,4 +1,29 @@
|
|
1
|
+
/**
|
2
|
+
* @file version.h
|
3
|
+
*
|
4
|
+
* The version of the Prism library.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_VERSION_H
|
7
|
+
#define PRISM_VERSION_H
|
8
|
+
|
9
|
+
/**
|
10
|
+
* The major version of the Prism library as an int.
|
11
|
+
*/
|
1
12
|
#define PRISM_VERSION_MAJOR 0
|
2
|
-
|
13
|
+
|
14
|
+
/**
|
15
|
+
* The minor version of the Prism library as an int.
|
16
|
+
*/
|
17
|
+
#define PRISM_VERSION_MINOR 17
|
18
|
+
|
19
|
+
/**
|
20
|
+
* The patch version of the Prism library as an int.
|
21
|
+
*/
|
3
22
|
#define PRISM_VERSION_PATCH 0
|
4
|
-
|
23
|
+
|
24
|
+
/**
|
25
|
+
* The version of the Prism library as a constant string.
|
26
|
+
*/
|
27
|
+
#define PRISM_VERSION "0.17.0"
|
28
|
+
|
29
|
+
#endif
|
data/include/prism.h
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
/**
|
2
|
+
* @file prism.h
|
3
|
+
*
|
4
|
+
* The main header file for the prism parser.
|
5
|
+
*/
|
1
6
|
#ifndef PRISM_H
|
2
7
|
#define PRISM_H
|
3
8
|
|
@@ -5,10 +10,12 @@
|
|
5
10
|
#include "prism/util/pm_buffer.h"
|
6
11
|
#include "prism/util/pm_char.h"
|
7
12
|
#include "prism/util/pm_memchr.h"
|
13
|
+
#include "prism/util/pm_strncasecmp.h"
|
8
14
|
#include "prism/util/pm_strpbrk.h"
|
9
15
|
#include "prism/ast.h"
|
10
16
|
#include "prism/diagnostic.h"
|
11
17
|
#include "prism/node.h"
|
18
|
+
#include "prism/options.h"
|
12
19
|
#include "prism/pack.h"
|
13
20
|
#include "prism/parser.h"
|
14
21
|
#include "prism/prettyprint.h"
|
@@ -28,54 +35,240 @@
|
|
28
35
|
#include <strings.h>
|
29
36
|
#endif
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
|
38
|
-
|
39
|
-
// The prism version and the serialization format.
|
38
|
+
/**
|
39
|
+
* The prism version and the serialization format.
|
40
|
+
*
|
41
|
+
* @returns The prism version as a constant string.
|
42
|
+
*/
|
40
43
|
PRISM_EXPORTED_FUNCTION const char * pm_version(void);
|
41
44
|
|
42
|
-
|
43
|
-
|
45
|
+
/**
|
46
|
+
* Initialize a parser with the given start and end pointers.
|
47
|
+
*
|
48
|
+
* @param parser The parser to initialize.
|
49
|
+
* @param source The source to parse.
|
50
|
+
* @param size The size of the source.
|
51
|
+
* @param options The optional options to use when parsing.
|
52
|
+
*/
|
53
|
+
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
|
44
54
|
|
45
|
-
|
46
|
-
|
55
|
+
/**
|
56
|
+
* Register a callback that will be called whenever prism changes the encoding
|
57
|
+
* it is using to parse based on the magic comment.
|
58
|
+
*
|
59
|
+
* @param parser The parser to register the callback with.
|
60
|
+
* @param callback The callback to register.
|
61
|
+
*/
|
47
62
|
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
|
48
63
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
64
|
+
/**
|
65
|
+
* Register a callback that will be called when prism encounters a magic comment
|
66
|
+
* with an encoding referenced that it doesn't understand. The callback should
|
67
|
+
* return NULL if it also doesn't understand the encoding or it should return a
|
68
|
+
* pointer to a pm_encoding_t struct that contains the functions necessary to
|
69
|
+
* parse identifiers.
|
70
|
+
*
|
71
|
+
* @param parser The parser to register the callback with.
|
72
|
+
* @param callback The callback to register.
|
73
|
+
*/
|
54
74
|
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
|
55
75
|
|
56
|
-
|
76
|
+
/**
|
77
|
+
* Free any memory associated with the given parser.
|
78
|
+
*
|
79
|
+
* @param parser The parser to free.
|
80
|
+
*/
|
57
81
|
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
|
58
82
|
|
59
|
-
|
83
|
+
/**
|
84
|
+
* Initiate the parser with the given parser.
|
85
|
+
*
|
86
|
+
* @param parser The parser to use.
|
87
|
+
* @return The AST representing the source.
|
88
|
+
*/
|
60
89
|
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
|
61
90
|
|
62
|
-
|
91
|
+
/**
|
92
|
+
* Serialize the given list of comments to the given buffer.
|
93
|
+
*
|
94
|
+
* @param parser The parser to serialize.
|
95
|
+
* @param list The list of comments to serialize.
|
96
|
+
* @param buffer The buffer to serialize to.
|
97
|
+
*/
|
98
|
+
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
|
99
|
+
|
100
|
+
/**
|
101
|
+
* Serialize the name of the encoding to the buffer.
|
102
|
+
*
|
103
|
+
* @param encoding The encoding to serialize.
|
104
|
+
* @param buffer The buffer to serialize to.
|
105
|
+
*/
|
106
|
+
void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);
|
107
|
+
|
108
|
+
/**
|
109
|
+
* Serialize the encoding, metadata, nodes, and constant pool.
|
110
|
+
*
|
111
|
+
* @param parser The parser to serialize.
|
112
|
+
* @param node The node to serialize.
|
113
|
+
* @param buffer The buffer to serialize to.
|
114
|
+
*/
|
115
|
+
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
116
|
+
|
117
|
+
/**
|
118
|
+
* Serialize the AST represented by the given node to the given buffer.
|
119
|
+
*
|
120
|
+
* @param parser The parser to serialize.
|
121
|
+
* @param node The node to serialize.
|
122
|
+
* @param buffer The buffer to serialize to.
|
123
|
+
*/
|
63
124
|
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
64
125
|
|
65
|
-
|
66
|
-
|
126
|
+
/**
|
127
|
+
* Parse the given source to the AST and dump the AST to the given buffer.
|
128
|
+
*
|
129
|
+
* @param buffer The buffer to serialize to.
|
130
|
+
* @param source The source to parse.
|
131
|
+
* @param size The size of the source.
|
132
|
+
* @param data The optional data to pass to the parser.
|
133
|
+
*/
|
134
|
+
PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
|
67
135
|
|
68
|
-
|
69
|
-
|
136
|
+
/**
|
137
|
+
* Parse and serialize the comments in the given source to the given buffer.
|
138
|
+
*
|
139
|
+
* @param buffer The buffer to serialize to.
|
140
|
+
* @param source The source to parse.
|
141
|
+
* @param size The size of the source.
|
142
|
+
* @param data The optional data to pass to the parser.
|
143
|
+
*/
|
144
|
+
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
|
70
145
|
|
71
|
-
|
72
|
-
|
146
|
+
/**
|
147
|
+
* Lex the given source and serialize to the given buffer.
|
148
|
+
*
|
149
|
+
* @param source The source to lex.
|
150
|
+
* @param size The size of the source.
|
151
|
+
* @param buffer The buffer to serialize to.
|
152
|
+
* @param data The optional data to pass to the lexer.
|
153
|
+
*/
|
154
|
+
PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
|
73
155
|
|
74
|
-
|
75
|
-
|
76
|
-
|
156
|
+
/**
|
157
|
+
* Parse and serialize both the AST and the tokens represented by the given
|
158
|
+
* source to the given buffer.
|
159
|
+
*
|
160
|
+
* @param buffer The buffer to serialize to.
|
161
|
+
* @param source The source to parse.
|
162
|
+
* @param size The size of the source.
|
163
|
+
* @param data The optional data to pass to the parser.
|
164
|
+
*/
|
165
|
+
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
|
77
166
|
|
78
|
-
|
167
|
+
/**
|
168
|
+
* Returns a string representation of the given token type.
|
169
|
+
*
|
170
|
+
* @param token_type The token type to convert to a string.
|
171
|
+
* @return A string representation of the given token type.
|
172
|
+
*/
|
79
173
|
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
|
80
174
|
|
175
|
+
/**
|
176
|
+
* @mainpage
|
177
|
+
*
|
178
|
+
* Prism is a parser for the Ruby programming language. It is designed to be
|
179
|
+
* portable, error tolerant, and maintainable. It is written in C99 and has no
|
180
|
+
* dependencies. It is currently being integrated into
|
181
|
+
* [CRuby](https://github.com/ruby/ruby),
|
182
|
+
* [JRuby](https://github.com/jruby/jruby),
|
183
|
+
* [TruffleRuby](https://github.com/oracle/truffleruby),
|
184
|
+
* [Sorbet](https://github.com/sorbet/sorbet), and
|
185
|
+
* [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
|
186
|
+
*
|
187
|
+
* @section getting-started Getting started
|
188
|
+
*
|
189
|
+
* If you're vendoring this project and compiling it statically then as long as
|
190
|
+
* you have a C99 compiler you will be fine. If you're linking against it as
|
191
|
+
* shared library, then you should compile with `-fvisibility=hidden` and
|
192
|
+
* `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
|
193
|
+
* visible.
|
194
|
+
*
|
195
|
+
* @section parsing Parsing
|
196
|
+
*
|
197
|
+
* In order to parse Ruby code, the structures and functions that you're going
|
198
|
+
* to want to use and be aware of are:
|
199
|
+
*
|
200
|
+
* * `pm_parser_t` - the main parser structure
|
201
|
+
* * `pm_parser_init` - initialize a parser
|
202
|
+
* * `pm_parse` - parse and return the root node
|
203
|
+
* * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
|
204
|
+
* * `pm_parser_free` - free the internal memory of the parser
|
205
|
+
*
|
206
|
+
* Putting all of this together would look something like:
|
207
|
+
*
|
208
|
+
* ```c
|
209
|
+
* void parse(const uint8_t *source, size_t length) {
|
210
|
+
* pm_parser_t parser;
|
211
|
+
* pm_parser_init(&parser, source, length, NULL);
|
212
|
+
*
|
213
|
+
* pm_node_t *root = pm_parse(&parser);
|
214
|
+
* printf("PARSED!\n");
|
215
|
+
*
|
216
|
+
* pm_node_destroy(root);
|
217
|
+
* pm_parser_free(&parser);
|
218
|
+
* }
|
219
|
+
* ```
|
220
|
+
*
|
221
|
+
* All of the nodes "inherit" from `pm_node_t` by embedding those structures as
|
222
|
+
* their first member. This means you can downcast and upcast any node in the
|
223
|
+
* tree to a `pm_node_t`.
|
224
|
+
*
|
225
|
+
* @section serializing Serializing
|
226
|
+
*
|
227
|
+
* Prism provides the ability to serialize the AST and its related metadata into
|
228
|
+
* a binary format. This format is designed to be portable to different
|
229
|
+
* languages and runtimes so that you only need to make one FFI call in order to
|
230
|
+
* parse Ruby code. The structures and functions that you're going to want to
|
231
|
+
* use and be aware of are:
|
232
|
+
*
|
233
|
+
* * `pm_buffer_t` - a small buffer object that will hold the serialized AST
|
234
|
+
* * `pm_buffer_free` - free the memory associated with the buffer
|
235
|
+
* * `pm_serialize` - serialize the AST into a buffer
|
236
|
+
* * `pm_serialize_parse` - parse and serialize the AST into a buffer
|
237
|
+
*
|
238
|
+
* Putting all of this together would look something like:
|
239
|
+
*
|
240
|
+
* ```c
|
241
|
+
* void serialize(const uint8_t *source, size_t length) {
|
242
|
+
* pm_buffer_t buffer = { 0 };
|
243
|
+
*
|
244
|
+
* pm_serialize_parse(&buffer, source, length, NULL);
|
245
|
+
* printf("SERIALIZED!\n");
|
246
|
+
*
|
247
|
+
* pm_buffer_free(&buffer);
|
248
|
+
* }
|
249
|
+
* ```
|
250
|
+
*
|
251
|
+
* @section inspecting Inspecting
|
252
|
+
*
|
253
|
+
* Prism provides the ability to inspect the AST by pretty-printing nodes. You
|
254
|
+
* can do this with the `pm_prettyprint` function, which you would use like:
|
255
|
+
*
|
256
|
+
* ```c
|
257
|
+
* void prettyprint(const uint8_t *source, size_t length) {
|
258
|
+
* pm_parser_t parser;
|
259
|
+
* pm_parser_init(&parser, source, length, NULL);
|
260
|
+
*
|
261
|
+
* pm_node_t *root = pm_parse(&parser);
|
262
|
+
* pm_buffer_t buffer = { 0 };
|
263
|
+
*
|
264
|
+
* pm_prettyprint(&buffer, &parser, root);
|
265
|
+
* printf("*.s%\n", (int) buffer.length, buffer.value);
|
266
|
+
*
|
267
|
+
* pm_buffer_free(&buffer);
|
268
|
+
* pm_node_destroy(root);
|
269
|
+
* pm_parser_free(&parser);
|
270
|
+
* }
|
271
|
+
* ```
|
272
|
+
*/
|
273
|
+
|
81
274
|
#endif
|
data/lib/prism/compiler.rb
CHANGED
@@ -291,9 +291,6 @@ module Prism
|
|
291
291
|
# Compile a KeywordHashNode node
|
292
292
|
alias visit_keyword_hash_node visit_child_nodes
|
293
293
|
|
294
|
-
# Compile a KeywordParameterNode node
|
295
|
-
alias visit_keyword_parameter_node visit_child_nodes
|
296
|
-
|
297
294
|
# Compile a KeywordRestParameterNode node
|
298
295
|
alias visit_keyword_rest_parameter_node visit_child_nodes
|
299
296
|
|
@@ -354,6 +351,9 @@ module Prism
|
|
354
351
|
# Compile a NumberedReferenceReadNode node
|
355
352
|
alias visit_numbered_reference_read_node visit_child_nodes
|
356
353
|
|
354
|
+
# Compile a OptionalKeywordParameterNode node
|
355
|
+
alias visit_optional_keyword_parameter_node visit_child_nodes
|
356
|
+
|
357
357
|
# Compile a OptionalParameterNode node
|
358
358
|
alias visit_optional_parameter_node visit_child_nodes
|
359
359
|
|
@@ -393,6 +393,9 @@ module Prism
|
|
393
393
|
# Compile a RegularExpressionNode node
|
394
394
|
alias visit_regular_expression_node visit_child_nodes
|
395
395
|
|
396
|
+
# Compile a RequiredKeywordParameterNode node
|
397
|
+
alias visit_required_keyword_parameter_node visit_child_nodes
|
398
|
+
|
396
399
|
# Compile a RequiredParameterNode node
|
397
400
|
alias visit_required_parameter_node visit_child_nodes
|
398
401
|
|