jruby-prism-parser 0.23.0.pre.SNAPSHOT-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +401 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +101 -0
- data/README.md +98 -0
- data/config.yml +2902 -0
- data/docs/build_system.md +91 -0
- data/docs/configuration.md +64 -0
- data/docs/cruby_compilation.md +27 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +121 -0
- data/docs/fuzzing.md +88 -0
- data/docs/heredocs.md +36 -0
- data/docs/javascript.md +118 -0
- data/docs/local_variable_depth.md +229 -0
- data/docs/mapping.md +117 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +98 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +43 -0
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +209 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +5098 -0
- data/ext/prism/api_pack.c +267 -0
- data/ext/prism/extconf.rb +110 -0
- data/ext/prism/extension.c +1155 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +5807 -0
- data/include/prism/defines.h +102 -0
- data/include/prism/diagnostic.h +339 -0
- data/include/prism/encoding.h +265 -0
- data/include/prism/node.h +57 -0
- data/include/prism/options.h +230 -0
- data/include/prism/pack.h +152 -0
- data/include/prism/parser.h +732 -0
- data/include/prism/prettyprint.h +26 -0
- data/include/prism/regexp.h +33 -0
- data/include/prism/util/pm_buffer.h +155 -0
- data/include/prism/util/pm_char.h +205 -0
- data/include/prism/util/pm_constant_pool.h +209 -0
- data/include/prism/util/pm_list.h +97 -0
- data/include/prism/util/pm_memchr.h +29 -0
- data/include/prism/util/pm_newline_list.h +93 -0
- data/include/prism/util/pm_state_stack.h +42 -0
- data/include/prism/util/pm_string.h +150 -0
- data/include/prism/util/pm_string_list.h +44 -0
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +46 -0
- data/include/prism/version.h +29 -0
- data/include/prism.h +289 -0
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +486 -0
- data/lib/prism/debug.rb +206 -0
- data/lib/prism/desugar_compiler.rb +207 -0
- data/lib/prism/dispatcher.rb +2150 -0
- data/lib/prism/dot_visitor.rb +4634 -0
- data/lib/prism/dsl.rb +785 -0
- data/lib/prism/ffi.rb +346 -0
- data/lib/prism/lex_compat.rb +908 -0
- data/lib/prism/mutation_compiler.rb +753 -0
- data/lib/prism/node.rb +17864 -0
- data/lib/prism/node_ext.rb +212 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +224 -0
- data/lib/prism/parse_result/comments.rb +177 -0
- data/lib/prism/parse_result/newlines.rb +64 -0
- data/lib/prism/parse_result.rb +498 -0
- data/lib/prism/pattern.rb +250 -0
- data/lib/prism/serialize.rb +1354 -0
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +178 -0
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism/version.rb +3 -0
- data/lib/prism/visitor.rb +495 -0
- data/lib/prism.rb +99 -0
- data/prism.gemspec +135 -0
- data/rbi/prism.rbi +7767 -0
- data/rbi/prism_static.rbi +207 -0
- data/sig/prism.rbs +4773 -0
- data/sig/prism_static.rbs +201 -0
- data/src/diagnostic.c +400 -0
- data/src/encoding.c +5132 -0
- data/src/node.c +2786 -0
- data/src/options.c +213 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +8881 -0
- data/src/prism.c +18406 -0
- data/src/regexp.c +638 -0
- data/src/serialize.c +1554 -0
- data/src/token_type.c +700 -0
- data/src/util/pm_buffer.c +190 -0
- data/src/util/pm_char.c +318 -0
- data/src/util/pm_constant_pool.c +322 -0
- data/src/util/pm_list.c +49 -0
- data/src/util/pm_memchr.c +35 -0
- data/src/util/pm_newline_list.c +84 -0
- data/src/util/pm_state_stack.c +25 -0
- data/src/util/pm_string.c +203 -0
- data/src/util/pm_string_list.c +28 -0
- data/src/util/pm_strncasecmp.c +24 -0
- data/src/util/pm_strpbrk.c +180 -0
- metadata +156 -0
@@ -0,0 +1,203 @@
|
|
1
|
+
#include "prism/util/pm_string.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Returns the size of the pm_string_t struct. This is necessary to allocate the
|
5
|
+
* correct amount of memory in the FFI backend.
|
6
|
+
*/
|
7
|
+
PRISM_EXPORTED_FUNCTION size_t
|
8
|
+
pm_string_sizeof(void) {
|
9
|
+
return sizeof(pm_string_t);
|
10
|
+
}
|
11
|
+
|
12
|
+
/**
|
13
|
+
* Initialize a shared string that is based on initial input.
|
14
|
+
*/
|
15
|
+
void
|
16
|
+
pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
|
17
|
+
assert(start <= end);
|
18
|
+
|
19
|
+
*string = (pm_string_t) {
|
20
|
+
.type = PM_STRING_SHARED,
|
21
|
+
.source = start,
|
22
|
+
.length = (size_t) (end - start)
|
23
|
+
};
|
24
|
+
}
|
25
|
+
|
26
|
+
/**
|
27
|
+
* Initialize an owned string that is responsible for freeing allocated memory.
|
28
|
+
*/
|
29
|
+
void
|
30
|
+
pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
|
31
|
+
*string = (pm_string_t) {
|
32
|
+
.type = PM_STRING_OWNED,
|
33
|
+
.source = source,
|
34
|
+
.length = length
|
35
|
+
};
|
36
|
+
}
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Initialize a constant string that doesn't own its memory source.
|
40
|
+
*/
|
41
|
+
void
|
42
|
+
pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
|
43
|
+
*string = (pm_string_t) {
|
44
|
+
.type = PM_STRING_CONSTANT,
|
45
|
+
.source = (const uint8_t *) source,
|
46
|
+
.length = length
|
47
|
+
};
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Read the file indicated by the filepath parameter into source and load its
|
52
|
+
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
53
|
+
* should be freed using `pm_string_free` when it is no longer used.
|
54
|
+
*
|
55
|
+
* We want to use demand paging as much as possible in order to avoid having to
|
56
|
+
* read the entire file into memory (which could be detrimental to performance
|
57
|
+
* for large files). This means that if we're on windows we'll use
|
58
|
+
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
59
|
+
* `mmap`, and on other POSIX systems we'll use `read`.
|
60
|
+
*/
|
61
|
+
bool
|
62
|
+
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
63
|
+
#ifdef _WIN32
|
64
|
+
// Open the file for reading.
|
65
|
+
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
66
|
+
|
67
|
+
if (file == INVALID_HANDLE_VALUE) {
|
68
|
+
return false;
|
69
|
+
}
|
70
|
+
|
71
|
+
// Get the file size.
|
72
|
+
DWORD file_size = GetFileSize(file, NULL);
|
73
|
+
if (file_size == INVALID_FILE_SIZE) {
|
74
|
+
CloseHandle(file);
|
75
|
+
return false;
|
76
|
+
}
|
77
|
+
|
78
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
79
|
+
// the source to a constant empty string and return.
|
80
|
+
if (file_size == 0) {
|
81
|
+
CloseHandle(file);
|
82
|
+
const uint8_t source[] = "";
|
83
|
+
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
84
|
+
return true;
|
85
|
+
}
|
86
|
+
|
87
|
+
// Create a mapping of the file.
|
88
|
+
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
89
|
+
if (mapping == NULL) {
|
90
|
+
CloseHandle(file);
|
91
|
+
return false;
|
92
|
+
}
|
93
|
+
|
94
|
+
// Map the file into memory.
|
95
|
+
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
96
|
+
CloseHandle(mapping);
|
97
|
+
CloseHandle(file);
|
98
|
+
|
99
|
+
if (source == NULL) {
|
100
|
+
return false;
|
101
|
+
}
|
102
|
+
|
103
|
+
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
|
104
|
+
return true;
|
105
|
+
#else
|
106
|
+
// Open the file for reading
|
107
|
+
int fd = open(filepath, O_RDONLY);
|
108
|
+
if (fd == -1) {
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
|
112
|
+
// Stat the file to get the file size
|
113
|
+
struct stat sb;
|
114
|
+
if (fstat(fd, &sb) == -1) {
|
115
|
+
close(fd);
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
// mmap the file descriptor to virtually get the contents
|
120
|
+
size_t size = (size_t) sb.st_size;
|
121
|
+
uint8_t *source = NULL;
|
122
|
+
|
123
|
+
if (size == 0) {
|
124
|
+
close(fd);
|
125
|
+
const uint8_t source[] = "";
|
126
|
+
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
127
|
+
return true;
|
128
|
+
}
|
129
|
+
|
130
|
+
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
131
|
+
if (source == MAP_FAILED) {
|
132
|
+
return false;
|
133
|
+
}
|
134
|
+
|
135
|
+
close(fd);
|
136
|
+
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
|
137
|
+
return true;
|
138
|
+
#endif
|
139
|
+
}
|
140
|
+
|
141
|
+
/**
|
142
|
+
* Returns the memory size associated with the string.
|
143
|
+
*/
|
144
|
+
size_t
|
145
|
+
pm_string_memsize(const pm_string_t *string) {
|
146
|
+
size_t size = sizeof(pm_string_t);
|
147
|
+
if (string->type == PM_STRING_OWNED) {
|
148
|
+
size += string->length;
|
149
|
+
}
|
150
|
+
return size;
|
151
|
+
}
|
152
|
+
|
153
|
+
/**
|
154
|
+
* Ensure the string is owned. If it is not, then reinitialize it as owned and
|
155
|
+
* copy over the previous source.
|
156
|
+
*/
|
157
|
+
void
|
158
|
+
pm_string_ensure_owned(pm_string_t *string) {
|
159
|
+
if (string->type == PM_STRING_OWNED) return;
|
160
|
+
|
161
|
+
size_t length = pm_string_length(string);
|
162
|
+
const uint8_t *source = pm_string_source(string);
|
163
|
+
|
164
|
+
uint8_t *memory = malloc(length);
|
165
|
+
if (!memory) return;
|
166
|
+
|
167
|
+
pm_string_owned_init(string, memory, length);
|
168
|
+
memcpy((void *) string->source, source, length);
|
169
|
+
}
|
170
|
+
|
171
|
+
/**
|
172
|
+
* Returns the length associated with the string.
|
173
|
+
*/
|
174
|
+
PRISM_EXPORTED_FUNCTION size_t
|
175
|
+
pm_string_length(const pm_string_t *string) {
|
176
|
+
return string->length;
|
177
|
+
}
|
178
|
+
|
179
|
+
/**
|
180
|
+
* Returns the start pointer associated with the string.
|
181
|
+
*/
|
182
|
+
PRISM_EXPORTED_FUNCTION const uint8_t *
|
183
|
+
pm_string_source(const pm_string_t *string) {
|
184
|
+
return string->source;
|
185
|
+
}
|
186
|
+
|
187
|
+
/**
|
188
|
+
* Free the associated memory of the given string.
|
189
|
+
*/
|
190
|
+
PRISM_EXPORTED_FUNCTION void
|
191
|
+
pm_string_free(pm_string_t *string) {
|
192
|
+
void *memory = (void *) string->source;
|
193
|
+
|
194
|
+
if (string->type == PM_STRING_OWNED) {
|
195
|
+
free(memory);
|
196
|
+
} else if (string->type == PM_STRING_MAPPED && string->length) {
|
197
|
+
#if defined(_WIN32)
|
198
|
+
UnmapViewOfFile(memory);
|
199
|
+
#else
|
200
|
+
munmap(memory, string->length);
|
201
|
+
#endif
|
202
|
+
}
|
203
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#include "prism/util/pm_string_list.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Append a pm_string_t to the given string list.
|
5
|
+
*/
|
6
|
+
void
|
7
|
+
pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
|
8
|
+
if (string_list->length + 1 > string_list->capacity) {
|
9
|
+
if (string_list->capacity == 0) {
|
10
|
+
string_list->capacity = 1;
|
11
|
+
} else {
|
12
|
+
string_list->capacity *= 2;
|
13
|
+
}
|
14
|
+
|
15
|
+
string_list->strings = realloc(string_list->strings, string_list->capacity * sizeof(pm_string_t));
|
16
|
+
if (string_list->strings == NULL) abort();
|
17
|
+
}
|
18
|
+
|
19
|
+
string_list->strings[string_list->length++] = *string;
|
20
|
+
}
|
21
|
+
|
22
|
+
/**
|
23
|
+
* Free the memory associated with the string list
|
24
|
+
*/
|
25
|
+
void
|
26
|
+
pm_string_list_free(pm_string_list_t *string_list) {
|
27
|
+
free(string_list->strings);
|
28
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#include "prism/util/pm_strncasecmp.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Compare two strings, ignoring case, up to the given length. Returns 0 if the
|
5
|
+
* strings are equal, a negative number if string1 is less than string2, or a
|
6
|
+
* positive number if string1 is greater than string2.
|
7
|
+
*
|
8
|
+
* Note that this is effectively our own implementation of strncasecmp, but it's
|
9
|
+
* not available on all of the platforms we want to support so we're rolling it
|
10
|
+
* here.
|
11
|
+
*/
|
12
|
+
int
|
13
|
+
pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
|
14
|
+
size_t offset = 0;
|
15
|
+
int difference = 0;
|
16
|
+
|
17
|
+
while (offset < length && string1[offset] != '\0') {
|
18
|
+
if (string2[offset] == '\0') return string1[offset];
|
19
|
+
if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
|
20
|
+
offset++;
|
21
|
+
}
|
22
|
+
|
23
|
+
return difference;
|
24
|
+
}
|
@@ -0,0 +1,180 @@
|
|
1
|
+
#include "prism/util/pm_strpbrk.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Add an invalid multibyte character error to the parser.
|
5
|
+
*/
|
6
|
+
static inline void
|
7
|
+
pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
8
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* This is the default path.
|
13
|
+
*/
|
14
|
+
static inline const uint8_t *
|
15
|
+
pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
16
|
+
size_t index = 0;
|
17
|
+
|
18
|
+
while (index < maximum) {
|
19
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
20
|
+
return source + index;
|
21
|
+
}
|
22
|
+
|
23
|
+
if (source[index] < 0x80) {
|
24
|
+
index++;
|
25
|
+
} else {
|
26
|
+
size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
|
27
|
+
|
28
|
+
if (width > 0) {
|
29
|
+
index += width;
|
30
|
+
} else if (!validate) {
|
31
|
+
index++;
|
32
|
+
} else {
|
33
|
+
// At this point we know we have an invalid multibyte character.
|
34
|
+
// We'll walk forward as far as we can until we find the next
|
35
|
+
// valid character so that we don't spam the user with a ton of
|
36
|
+
// the same kind of error.
|
37
|
+
const size_t start = index;
|
38
|
+
|
39
|
+
do {
|
40
|
+
index++;
|
41
|
+
} while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
42
|
+
|
43
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
return NULL;
|
49
|
+
}
|
50
|
+
|
51
|
+
/**
|
52
|
+
* This is the path when the encoding is ASCII-8BIT.
|
53
|
+
*/
|
54
|
+
static inline const uint8_t *
|
55
|
+
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
56
|
+
size_t index = 0;
|
57
|
+
|
58
|
+
while (index < maximum) {
|
59
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
60
|
+
return source + index;
|
61
|
+
}
|
62
|
+
|
63
|
+
index++;
|
64
|
+
}
|
65
|
+
|
66
|
+
return NULL;
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* This is the slow path that does care about the encoding.
|
71
|
+
*/
|
72
|
+
static inline const uint8_t *
|
73
|
+
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
|
+
size_t index = 0;
|
75
|
+
|
76
|
+
while (index < maximum) {
|
77
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
78
|
+
return source + index;
|
79
|
+
}
|
80
|
+
|
81
|
+
if (source[index] < 0x80) {
|
82
|
+
index++;
|
83
|
+
} else {
|
84
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
85
|
+
|
86
|
+
if (width > 0) {
|
87
|
+
index += width;
|
88
|
+
} else if (!validate) {
|
89
|
+
index++;
|
90
|
+
} else {
|
91
|
+
// At this point we know we have an invalid multibyte character.
|
92
|
+
// We'll walk forward as far as we can until we find the next
|
93
|
+
// valid character so that we don't spam the user with a ton of
|
94
|
+
// the same kind of error.
|
95
|
+
const size_t start = index;
|
96
|
+
|
97
|
+
do {
|
98
|
+
index++;
|
99
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
|
+
|
101
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
return NULL;
|
107
|
+
}
|
108
|
+
|
109
|
+
/**
|
110
|
+
* This is the fast path that does not care about the encoding because we know
|
111
|
+
* the encoding only supports single-byte characters.
|
112
|
+
*/
|
113
|
+
static inline const uint8_t *
|
114
|
+
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
|
+
size_t index = 0;
|
116
|
+
|
117
|
+
while (index < maximum) {
|
118
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
119
|
+
return source + index;
|
120
|
+
}
|
121
|
+
|
122
|
+
if (source[index] < 0x80 || !validate) {
|
123
|
+
index++;
|
124
|
+
} else {
|
125
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
126
|
+
|
127
|
+
if (width > 0) {
|
128
|
+
index += width;
|
129
|
+
} else {
|
130
|
+
// At this point we know we have an invalid multibyte character.
|
131
|
+
// We'll walk forward as far as we can until we find the next
|
132
|
+
// valid character so that we don't spam the user with a ton of
|
133
|
+
// the same kind of error.
|
134
|
+
const size_t start = index;
|
135
|
+
|
136
|
+
do {
|
137
|
+
index++;
|
138
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
|
+
|
140
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
return NULL;
|
146
|
+
}
|
147
|
+
|
148
|
+
/**
|
149
|
+
* Here we have rolled our own version of strpbrk. The standard library strpbrk
|
150
|
+
* has undefined behavior when the source string is not null-terminated. We want
|
151
|
+
* to support strings that are not null-terminated because pm_parse does not
|
152
|
+
* have the contract that the string is null-terminated. (This is desirable
|
153
|
+
* because it means the extension can call pm_parse with the result of a call to
|
154
|
+
* mmap).
|
155
|
+
*
|
156
|
+
* The standard library strpbrk also does not support passing a maximum length
|
157
|
+
* to search. We want to support this for the reason mentioned above, but we
|
158
|
+
* also don't want it to stop on null bytes. Ruby actually allows null bytes
|
159
|
+
* within strings, comments, regular expressions, etc. So we need to be able to
|
160
|
+
* skip past them.
|
161
|
+
*
|
162
|
+
* Finally, we want to support encodings wherein the charset could contain
|
163
|
+
* characters that are trailing bytes of multi-byte characters. For example, in
|
164
|
+
* Shift_JIS, the backslash character can be a trailing byte. In that case we
|
165
|
+
* need to take a slower path and iterate one multi-byte character at a time.
|
166
|
+
*/
|
167
|
+
const uint8_t *
|
168
|
+
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
|
169
|
+
if (length <= 0) {
|
170
|
+
return NULL;
|
171
|
+
} else if (!parser->encoding_changed) {
|
172
|
+
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
|
+
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
+
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
175
|
+
} else if (parser->encoding->multibyte) {
|
176
|
+
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
177
|
+
} else {
|
178
|
+
return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
|
179
|
+
}
|
180
|
+
}
|
metadata
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jruby-prism-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.23.0.pre.SNAPSHOT
|
5
|
+
platform: java
|
6
|
+
authors:
|
7
|
+
- JRuby Team
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-02-16 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- admin@jruby.org
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/prism/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- CHANGELOG.md
|
22
|
+
- CODE_OF_CONDUCT.md
|
23
|
+
- CONTRIBUTING.md
|
24
|
+
- LICENSE.md
|
25
|
+
- Makefile
|
26
|
+
- README.md
|
27
|
+
- config.yml
|
28
|
+
- docs/build_system.md
|
29
|
+
- docs/configuration.md
|
30
|
+
- docs/cruby_compilation.md
|
31
|
+
- docs/design.md
|
32
|
+
- docs/encoding.md
|
33
|
+
- docs/fuzzing.md
|
34
|
+
- docs/heredocs.md
|
35
|
+
- docs/javascript.md
|
36
|
+
- docs/local_variable_depth.md
|
37
|
+
- docs/mapping.md
|
38
|
+
- docs/parser_translation.md
|
39
|
+
- docs/parsing_rules.md
|
40
|
+
- docs/releasing.md
|
41
|
+
- docs/ripper.md
|
42
|
+
- docs/ruby_api.md
|
43
|
+
- docs/ruby_parser_translation.md
|
44
|
+
- docs/serialization.md
|
45
|
+
- docs/testing.md
|
46
|
+
- ext/prism/api_node.c
|
47
|
+
- ext/prism/api_pack.c
|
48
|
+
- ext/prism/extconf.rb
|
49
|
+
- ext/prism/extension.c
|
50
|
+
- ext/prism/extension.h
|
51
|
+
- include/prism.h
|
52
|
+
- include/prism/ast.h
|
53
|
+
- include/prism/defines.h
|
54
|
+
- include/prism/diagnostic.h
|
55
|
+
- include/prism/encoding.h
|
56
|
+
- include/prism/node.h
|
57
|
+
- include/prism/options.h
|
58
|
+
- include/prism/pack.h
|
59
|
+
- include/prism/parser.h
|
60
|
+
- include/prism/prettyprint.h
|
61
|
+
- include/prism/regexp.h
|
62
|
+
- include/prism/util/pm_buffer.h
|
63
|
+
- include/prism/util/pm_char.h
|
64
|
+
- include/prism/util/pm_constant_pool.h
|
65
|
+
- include/prism/util/pm_list.h
|
66
|
+
- include/prism/util/pm_memchr.h
|
67
|
+
- include/prism/util/pm_newline_list.h
|
68
|
+
- include/prism/util/pm_state_stack.h
|
69
|
+
- include/prism/util/pm_string.h
|
70
|
+
- include/prism/util/pm_string_list.h
|
71
|
+
- include/prism/util/pm_strncasecmp.h
|
72
|
+
- include/prism/util/pm_strpbrk.h
|
73
|
+
- include/prism/version.h
|
74
|
+
- jruby-prism.jar
|
75
|
+
- lib/prism.rb
|
76
|
+
- lib/prism/compiler.rb
|
77
|
+
- lib/prism/debug.rb
|
78
|
+
- lib/prism/desugar_compiler.rb
|
79
|
+
- lib/prism/dispatcher.rb
|
80
|
+
- lib/prism/dot_visitor.rb
|
81
|
+
- lib/prism/dsl.rb
|
82
|
+
- lib/prism/ffi.rb
|
83
|
+
- lib/prism/lex_compat.rb
|
84
|
+
- lib/prism/mutation_compiler.rb
|
85
|
+
- lib/prism/node.rb
|
86
|
+
- lib/prism/node_ext.rb
|
87
|
+
- lib/prism/node_inspector.rb
|
88
|
+
- lib/prism/pack.rb
|
89
|
+
- lib/prism/parse_result.rb
|
90
|
+
- lib/prism/parse_result/comments.rb
|
91
|
+
- lib/prism/parse_result/newlines.rb
|
92
|
+
- lib/prism/pattern.rb
|
93
|
+
- lib/prism/serialize.rb
|
94
|
+
- lib/prism/translation.rb
|
95
|
+
- lib/prism/translation/parser.rb
|
96
|
+
- lib/prism/translation/parser/compiler.rb
|
97
|
+
- lib/prism/translation/parser/lexer.rb
|
98
|
+
- lib/prism/translation/parser/rubocop.rb
|
99
|
+
- lib/prism/translation/ripper.rb
|
100
|
+
- lib/prism/translation/ruby_parser.rb
|
101
|
+
- lib/prism/version.rb
|
102
|
+
- lib/prism/visitor.rb
|
103
|
+
- prism.gemspec
|
104
|
+
- rbi/prism.rbi
|
105
|
+
- rbi/prism_static.rbi
|
106
|
+
- sig/prism.rbs
|
107
|
+
- sig/prism_static.rbs
|
108
|
+
- src/diagnostic.c
|
109
|
+
- src/encoding.c
|
110
|
+
- src/node.c
|
111
|
+
- src/options.c
|
112
|
+
- src/pack.c
|
113
|
+
- src/prettyprint.c
|
114
|
+
- src/prism.c
|
115
|
+
- src/regexp.c
|
116
|
+
- src/serialize.c
|
117
|
+
- src/token_type.c
|
118
|
+
- src/util/pm_buffer.c
|
119
|
+
- src/util/pm_char.c
|
120
|
+
- src/util/pm_constant_pool.c
|
121
|
+
- src/util/pm_list.c
|
122
|
+
- src/util/pm_memchr.c
|
123
|
+
- src/util/pm_newline_list.c
|
124
|
+
- src/util/pm_state_stack.c
|
125
|
+
- src/util/pm_string.c
|
126
|
+
- src/util/pm_string_list.c
|
127
|
+
- src/util/pm_strncasecmp.c
|
128
|
+
- src/util/pm_strpbrk.c
|
129
|
+
homepage: https://github.com/jruby/jruby-prism
|
130
|
+
licenses:
|
131
|
+
- MIT
|
132
|
+
metadata:
|
133
|
+
allowed_push_host: https://rubygems.org
|
134
|
+
source_code_uri: https://github.com/ruby/prism
|
135
|
+
changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
|
136
|
+
post_install_message:
|
137
|
+
rdoc_options: []
|
138
|
+
require_paths:
|
139
|
+
- lib
|
140
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: 2.7.0
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - ">"
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.3.1
|
150
|
+
requirements:
|
151
|
+
- jar org.jruby, jruby-prism, 0.23.0-SNAPSHOT, :scope => :runtime
|
152
|
+
rubygems_version: 3.3.26
|
153
|
+
signing_key:
|
154
|
+
specification_version: 4
|
155
|
+
summary: Prism JRuby Parser Support
|
156
|
+
test_files: []
|