@herb-tools/node 0.8.10 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/binding.gyp +26 -8
- package/dist/herb-node.cjs +41 -12
- package/dist/herb-node.cjs.map +1 -1
- package/dist/herb-node.esm.js +8 -1
- package/dist/herb-node.esm.js.map +1 -1
- package/dist/types/node-backend.d.ts +3 -1
- package/extension/error_helpers.cpp +395 -73
- package/extension/error_helpers.h +13 -3
- package/extension/extension_helpers.cpp +38 -35
- package/extension/extension_helpers.h +2 -2
- package/extension/herb.cpp +183 -64
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/analyze/action_view/content_tag.c +70 -0
- package/extension/libherb/analyze/action_view/link_to.c +143 -0
- package/extension/libherb/analyze/action_view/registry.c +60 -0
- package/extension/libherb/analyze/action_view/tag.c +64 -0
- package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
- package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
- package/extension/libherb/analyze/analyze.c +882 -0
- package/extension/libherb/{include → analyze}/analyze.h +14 -4
- package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/analyze/builders.c +343 -0
- package/extension/libherb/analyze/builders.h +27 -0
- package/extension/libherb/analyze/conditional_elements.c +594 -0
- package/extension/libherb/analyze/conditional_elements.h +9 -0
- package/extension/libherb/analyze/conditional_open_tags.c +640 -0
- package/extension/libherb/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/analyze/control_type.c +250 -0
- package/extension/libherb/analyze/control_type.h +14 -0
- package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
- package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/analyze/invalid_structures.c +193 -0
- package/extension/libherb/analyze/invalid_structures.h +11 -0
- package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- package/extension/libherb/analyze/parse_errors.c +84 -0
- package/extension/libherb/analyze/prism_annotate.c +397 -0
- package/extension/libherb/analyze/prism_annotate.h +16 -0
- package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
- package/extension/libherb/ast_node.c +17 -7
- package/extension/libherb/ast_node.h +11 -5
- package/extension/libherb/ast_nodes.c +663 -388
- package/extension/libherb/ast_nodes.h +118 -39
- package/extension/libherb/ast_pretty_print.c +191 -7
- package/extension/libherb/ast_pretty_print.h +6 -1
- package/extension/libherb/element_source.h +3 -8
- package/extension/libherb/errors.c +1077 -521
- package/extension/libherb/errors.h +149 -56
- package/extension/libherb/extract.c +145 -49
- package/extension/libherb/extract.h +21 -5
- package/extension/libherb/herb.c +52 -34
- package/extension/libherb/herb.h +18 -6
- package/extension/libherb/herb_prism_node.h +13 -0
- package/extension/libherb/html_util.c +241 -12
- package/extension/libherb/html_util.h +7 -2
- package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
- package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/include/analyze/builders.h +27 -0
- package/extension/libherb/include/analyze/conditional_elements.h +9 -0
- package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/include/analyze/control_type.h +14 -0
- package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/include/analyze/invalid_structures.h +11 -0
- package/extension/libherb/include/analyze/prism_annotate.h +16 -0
- package/extension/libherb/include/ast_node.h +11 -5
- package/extension/libherb/include/ast_nodes.h +118 -39
- package/extension/libherb/include/ast_pretty_print.h +6 -1
- package/extension/libherb/include/element_source.h +3 -8
- package/extension/libherb/include/errors.h +149 -56
- package/extension/libherb/include/extract.h +21 -5
- package/extension/libherb/include/herb.h +18 -6
- package/extension/libherb/include/herb_prism_node.h +13 -0
- package/extension/libherb/include/html_util.h +7 -2
- package/extension/libherb/include/io.h +3 -1
- package/extension/libherb/include/lex_helpers.h +29 -0
- package/extension/libherb/include/lexer.h +1 -1
- package/extension/libherb/include/lexer_peek_helpers.h +87 -13
- package/extension/libherb/include/lexer_struct.h +2 -0
- package/extension/libherb/include/location.h +2 -1
- package/extension/libherb/include/parser.h +27 -2
- package/extension/libherb/include/parser_helpers.h +19 -3
- package/extension/libherb/include/pretty_print.h +10 -5
- package/extension/libherb/include/prism_context.h +45 -0
- package/extension/libherb/include/prism_helpers.h +10 -7
- package/extension/libherb/include/prism_serialized.h +12 -0
- package/extension/libherb/include/token.h +16 -4
- package/extension/libherb/include/token_struct.h +10 -3
- package/extension/libherb/include/utf8.h +2 -1
- package/extension/libherb/include/util/hb_allocator.h +78 -0
- package/extension/libherb/include/util/hb_arena.h +6 -1
- package/extension/libherb/include/util/hb_arena_debug.h +12 -1
- package/extension/libherb/include/util/hb_array.h +7 -3
- package/extension/libherb/include/util/hb_buffer.h +6 -4
- package/extension/libherb/include/util/hb_foreach.h +79 -0
- package/extension/libherb/include/util/hb_narray.h +8 -4
- package/extension/libherb/include/util/hb_string.h +56 -9
- package/extension/libherb/include/util.h +6 -3
- package/extension/libherb/include/version.h +1 -1
- package/extension/libherb/io.c +3 -2
- package/extension/libherb/io.h +3 -1
- package/extension/libherb/lex_helpers.h +29 -0
- package/extension/libherb/lexer.c +42 -30
- package/extension/libherb/lexer.h +1 -1
- package/extension/libherb/lexer_peek_helpers.c +12 -74
- package/extension/libherb/lexer_peek_helpers.h +87 -13
- package/extension/libherb/lexer_struct.h +2 -0
- package/extension/libherb/location.c +2 -2
- package/extension/libherb/location.h +2 -1
- package/extension/libherb/main.c +53 -28
- package/extension/libherb/parser.c +783 -247
- package/extension/libherb/parser.h +27 -2
- package/extension/libherb/parser_helpers.c +110 -23
- package/extension/libherb/parser_helpers.h +19 -3
- package/extension/libherb/parser_match_tags.c +110 -49
- package/extension/libherb/pretty_print.c +29 -24
- package/extension/libherb/pretty_print.h +10 -5
- package/extension/libherb/prism_context.h +45 -0
- package/extension/libherb/prism_helpers.c +30 -27
- package/extension/libherb/prism_helpers.h +10 -7
- package/extension/libherb/prism_serialized.h +12 -0
- package/extension/libherb/ruby_parser.c +2 -0
- package/extension/libherb/token.c +151 -66
- package/extension/libherb/token.h +16 -4
- package/extension/libherb/token_matchers.c +0 -1
- package/extension/libherb/token_struct.h +10 -3
- package/extension/libherb/utf8.c +7 -6
- package/extension/libherb/utf8.h +2 -1
- package/extension/libherb/util/hb_allocator.c +341 -0
- package/extension/libherb/util/hb_allocator.h +78 -0
- package/extension/libherb/util/hb_arena.c +81 -56
- package/extension/libherb/util/hb_arena.h +6 -1
- package/extension/libherb/util/hb_arena_debug.c +32 -17
- package/extension/libherb/util/hb_arena_debug.h +12 -1
- package/extension/libherb/util/hb_array.c +30 -15
- package/extension/libherb/util/hb_array.h +7 -3
- package/extension/libherb/util/hb_buffer.c +17 -21
- package/extension/libherb/util/hb_buffer.h +6 -4
- package/extension/libherb/util/hb_foreach.h +79 -0
- package/extension/libherb/util/hb_narray.c +22 -7
- package/extension/libherb/util/hb_narray.h +8 -4
- package/extension/libherb/util/hb_string.c +49 -35
- package/extension/libherb/util/hb_string.h +56 -9
- package/extension/libherb/util.c +21 -11
- package/extension/libherb/util.h +6 -3
- package/extension/libherb/version.h +1 -1
- package/extension/libherb/visitor.c +48 -1
- package/extension/nodes.cpp +451 -6
- package/extension/nodes.h +8 -1
- package/package.json +12 -8
- package/src/node-backend.ts +11 -1
- package/dist/types/index-cjs.d.cts +0 -1
- package/extension/libherb/analyze.c +0 -1608
- package/extension/libherb/element_source.c +0 -12
- package/extension/libherb/include/util/hb_system.h +0 -9
- package/extension/libherb/util/hb_system.c +0 -30
- package/extension/libherb/util/hb_system.h +0 -9
- package/src/index-cjs.cts +0 -22
- /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
- /package/src/{index-esm.mts → index.ts} +0 -0
|
@@ -3,20 +3,18 @@
|
|
|
3
3
|
#include "include/errors.h"
|
|
4
4
|
#include "include/location.h"
|
|
5
5
|
#include "include/position.h"
|
|
6
|
-
#include "include/util.h"
|
|
7
6
|
#include "include/util/hb_buffer.h"
|
|
7
|
+
#include "include/util/hb_string.h"
|
|
8
8
|
|
|
9
9
|
#include <prism.h>
|
|
10
10
|
#include <stdlib.h>
|
|
11
11
|
#include <string.h>
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
hb_string_T pm_error_level_to_string(pm_error_level_t level) {
|
|
14
14
|
switch (level) {
|
|
15
|
-
case PM_ERROR_LEVEL_SYNTAX: return "syntax";
|
|
16
|
-
case PM_ERROR_LEVEL_ARGUMENT: return "argument";
|
|
17
|
-
case PM_ERROR_LEVEL_LOAD: return "load";
|
|
18
|
-
|
|
19
|
-
default: return "Unknown pm_error_level_t";
|
|
15
|
+
case PM_ERROR_LEVEL_SYNTAX: return hb_string("syntax");
|
|
16
|
+
case PM_ERROR_LEVEL_ARGUMENT: return hb_string("argument");
|
|
17
|
+
case PM_ERROR_LEVEL_LOAD: return hb_string("load");
|
|
20
18
|
}
|
|
21
19
|
}
|
|
22
20
|
|
|
@@ -24,7 +22,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
|
|
|
24
22
|
const pm_diagnostic_t* error,
|
|
25
23
|
const AST_NODE_T* node,
|
|
26
24
|
const char* source,
|
|
27
|
-
pm_parser_t* parser
|
|
25
|
+
pm_parser_t* parser,
|
|
26
|
+
hb_allocator_T* allocator
|
|
28
27
|
) {
|
|
29
28
|
size_t start_offset = (size_t) (error->location.start - parser->start);
|
|
30
29
|
size_t end_offset = (size_t) (error->location.end - parser->start);
|
|
@@ -33,25 +32,28 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
|
|
|
33
32
|
position_T end = position_from_source_with_offset(source, end_offset);
|
|
34
33
|
|
|
35
34
|
return ruby_parse_error_init(
|
|
36
|
-
error->message,
|
|
37
|
-
pm_diagnostic_id_human(error->diag_id),
|
|
35
|
+
hb_string(error->message),
|
|
36
|
+
hb_string(pm_diagnostic_id_human(error->diag_id)),
|
|
38
37
|
pm_error_level_to_string(error->level),
|
|
39
38
|
start,
|
|
40
|
-
end
|
|
39
|
+
end,
|
|
40
|
+
allocator
|
|
41
41
|
);
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
|
|
45
45
|
const pm_diagnostic_t* error,
|
|
46
46
|
position_T start,
|
|
47
|
-
position_T end
|
|
47
|
+
position_T end,
|
|
48
|
+
hb_allocator_T* allocator
|
|
48
49
|
) {
|
|
49
50
|
return ruby_parse_error_init(
|
|
50
|
-
error->message,
|
|
51
|
-
pm_diagnostic_id_human(error->diag_id),
|
|
51
|
+
hb_string(error->message),
|
|
52
|
+
hb_string(pm_diagnostic_id_human(error->diag_id)),
|
|
52
53
|
pm_error_level_to_string(error->level),
|
|
53
54
|
start,
|
|
54
|
-
end
|
|
55
|
+
end,
|
|
56
|
+
allocator
|
|
55
57
|
);
|
|
56
58
|
}
|
|
57
59
|
|
|
@@ -118,7 +120,7 @@ static bool search_then_keyword_location(const pm_node_t* node, void* data) {
|
|
|
118
120
|
return false;
|
|
119
121
|
}
|
|
120
122
|
|
|
121
|
-
location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source) {
|
|
123
|
+
location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator) {
|
|
122
124
|
if (analyzed == NULL || analyzed->root == NULL || source == NULL) { return NULL; }
|
|
123
125
|
|
|
124
126
|
then_keyword_search_context_T context = { .then_keyword_loc = { .start = NULL, .end = NULL }, .found = false };
|
|
@@ -133,7 +135,7 @@ location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* sou
|
|
|
133
135
|
position_T start_position = position_from_source_with_offset(source, start_offset);
|
|
134
136
|
position_T end_position = position_from_source_with_offset(source, end_offset);
|
|
135
137
|
|
|
136
|
-
return location_create(start_position, end_position);
|
|
138
|
+
return location_create(start_position, end_position, allocator);
|
|
137
139
|
}
|
|
138
140
|
|
|
139
141
|
static location_T* parse_wrapped_and_find_then_keyword(
|
|
@@ -142,7 +144,8 @@ static location_T* parse_wrapped_and_find_then_keyword(
|
|
|
142
144
|
size_t source_length,
|
|
143
145
|
size_t prefix_length,
|
|
144
146
|
size_t adjustment_threshold,
|
|
145
|
-
size_t adjustment_amount
|
|
147
|
+
size_t adjustment_amount,
|
|
148
|
+
hb_allocator_T* allocator
|
|
146
149
|
) {
|
|
147
150
|
pm_parser_t parser;
|
|
148
151
|
pm_parser_init(&parser, (const uint8_t*) hb_buffer_value(buffer), hb_buffer_length(buffer), NULL);
|
|
@@ -177,7 +180,7 @@ static location_T* parse_wrapped_and_find_then_keyword(
|
|
|
177
180
|
position_T start_position = position_from_source_with_offset(source, start_offset);
|
|
178
181
|
position_T end_position = position_from_source_with_offset(source, end_offset);
|
|
179
182
|
|
|
180
|
-
location = location_create(start_position, end_position);
|
|
183
|
+
location = location_create(start_position, end_position, allocator);
|
|
181
184
|
}
|
|
182
185
|
}
|
|
183
186
|
}
|
|
@@ -188,14 +191,14 @@ static location_T* parse_wrapped_and_find_then_keyword(
|
|
|
188
191
|
return location;
|
|
189
192
|
}
|
|
190
193
|
|
|
191
|
-
location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause) {
|
|
194
|
+
location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator) {
|
|
192
195
|
if (source == NULL) { return NULL; }
|
|
193
196
|
|
|
194
197
|
size_t source_length = strlen(source);
|
|
195
198
|
|
|
196
199
|
hb_buffer_T buffer;
|
|
197
200
|
|
|
198
|
-
if (!hb_buffer_init(&buffer, source_length + 16)) { return NULL; }
|
|
201
|
+
if (!hb_buffer_init(&buffer, source_length + 16, allocator)) { return NULL; }
|
|
199
202
|
|
|
200
203
|
hb_buffer_append(&buffer, "case x\n");
|
|
201
204
|
size_t prefix_length = hb_buffer_length(&buffer);
|
|
@@ -203,14 +206,14 @@ location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_cla
|
|
|
203
206
|
hb_buffer_append(&buffer, "\nend");
|
|
204
207
|
|
|
205
208
|
location_T* location =
|
|
206
|
-
parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0);
|
|
209
|
+
parse_wrapped_and_find_then_keyword(&buffer, source, source_length, prefix_length, SIZE_MAX, 0, allocator);
|
|
207
210
|
|
|
208
|
-
|
|
211
|
+
hb_buffer_free(&buffer);
|
|
209
212
|
|
|
210
213
|
return location;
|
|
211
214
|
}
|
|
212
215
|
|
|
213
|
-
location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
|
|
216
|
+
location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator) {
|
|
214
217
|
if (source == NULL) { return NULL; }
|
|
215
218
|
|
|
216
219
|
const char* elsif_position = strstr(source, "elsif");
|
|
@@ -223,7 +226,7 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
|
|
|
223
226
|
|
|
224
227
|
hb_buffer_T buffer;
|
|
225
228
|
|
|
226
|
-
if (!hb_buffer_init(&buffer, source_length + 8)) { return NULL; }
|
|
229
|
+
if (!hb_buffer_init(&buffer, source_length + 8, allocator)) { return NULL; }
|
|
227
230
|
|
|
228
231
|
hb_buffer_append_with_length(&buffer, source, elsif_offset);
|
|
229
232
|
hb_buffer_append(&buffer, "if");
|
|
@@ -232,9 +235,9 @@ location_T* get_then_keyword_location_elsif_wrapped(const char* source) {
|
|
|
232
235
|
hb_buffer_append(&buffer, "\nend");
|
|
233
236
|
|
|
234
237
|
location_T* location =
|
|
235
|
-
parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff);
|
|
238
|
+
parse_wrapped_and_find_then_keyword(&buffer, source, source_length, 0, if_end_offset, replacement_diff, allocator);
|
|
236
239
|
|
|
237
|
-
|
|
240
|
+
hb_buffer_free(&buffer);
|
|
238
241
|
|
|
239
242
|
return location;
|
|
240
243
|
}
|
|
@@ -1,31 +1,34 @@
|
|
|
1
1
|
#ifndef HERB_PRISM_HELPERS_H
|
|
2
2
|
#define HERB_PRISM_HELPERS_H
|
|
3
3
|
|
|
4
|
-
#include "analyzed_ruby.h"
|
|
4
|
+
#include "analyze/analyzed_ruby.h"
|
|
5
5
|
#include "ast_nodes.h"
|
|
6
6
|
#include "errors.h"
|
|
7
7
|
#include "location.h"
|
|
8
8
|
#include "position.h"
|
|
9
|
+
#include "util/hb_allocator.h"
|
|
9
10
|
|
|
10
11
|
#include <prism.h>
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
hb_string_T pm_error_level_to_string(pm_error_level_t level);
|
|
13
14
|
|
|
14
15
|
RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
|
|
15
16
|
const pm_diagnostic_t* error,
|
|
16
17
|
const AST_NODE_T* node,
|
|
17
18
|
const char* source,
|
|
18
|
-
pm_parser_t* parser
|
|
19
|
+
pm_parser_t* parser,
|
|
20
|
+
hb_allocator_T* allocator
|
|
19
21
|
);
|
|
20
22
|
|
|
21
23
|
RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions(
|
|
22
24
|
const pm_diagnostic_t* error,
|
|
23
25
|
position_T start,
|
|
24
|
-
position_T end
|
|
26
|
+
position_T end,
|
|
27
|
+
hb_allocator_T* allocator
|
|
25
28
|
);
|
|
26
29
|
|
|
27
|
-
location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source);
|
|
28
|
-
location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause);
|
|
29
|
-
location_T* get_then_keyword_location_elsif_wrapped(const char* source);
|
|
30
|
+
location_T* get_then_keyword_location(analyzed_ruby_T* analyzed, const char* source, hb_allocator_T* allocator);
|
|
31
|
+
location_T* get_then_keyword_location_wrapped(const char* source, bool is_in_clause, hb_allocator_T* allocator);
|
|
32
|
+
location_T* get_then_keyword_location_elsif_wrapped(const char* source, hb_allocator_T* allocator);
|
|
30
33
|
|
|
31
34
|
#endif
|
|
@@ -38,8 +38,10 @@ void herb_parse_ruby_to_stdout(char* source) {
|
|
|
38
38
|
|
|
39
39
|
pm_visit_node(root, herb_prism_visit, data);
|
|
40
40
|
|
|
41
|
+
#ifndef PRISM_EXCLUDE_PRETTYPRINT
|
|
41
42
|
pm_prettyprint(&buffer, &parser, root);
|
|
42
43
|
printf("%s\n", buffer.value);
|
|
44
|
+
#endif
|
|
43
45
|
|
|
44
46
|
pm_buffer_free(&buffer);
|
|
45
47
|
pm_node_destroy(&parser, root);
|
|
@@ -1,24 +1,30 @@
|
|
|
1
1
|
#include "include/token.h"
|
|
2
|
-
#include "include/lexer.h"
|
|
3
2
|
#include "include/position.h"
|
|
4
3
|
#include "include/range.h"
|
|
5
4
|
#include "include/token_struct.h"
|
|
6
5
|
#include "include/util.h"
|
|
6
|
+
#include "include/util/hb_allocator.h"
|
|
7
|
+
#include "include/util/hb_buffer.h"
|
|
8
|
+
#include "include/util/hb_string.h"
|
|
7
9
|
|
|
10
|
+
#include <stdarg.h>
|
|
8
11
|
#include <stdbool.h>
|
|
9
12
|
#include <stdio.h>
|
|
10
13
|
#include <stdlib.h>
|
|
11
14
|
#include <string.h>
|
|
12
15
|
|
|
13
16
|
token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
|
|
14
|
-
|
|
17
|
+
hb_allocator_T* allocator = lexer->allocator;
|
|
18
|
+
token_T* token = hb_allocator_alloc(allocator, sizeof(token_T));
|
|
19
|
+
|
|
20
|
+
if (!token) { return NULL; }
|
|
15
21
|
|
|
16
22
|
if (type == TOKEN_NEWLINE) {
|
|
17
23
|
lexer->current_line++;
|
|
18
24
|
lexer->current_column = 0;
|
|
19
25
|
}
|
|
20
26
|
|
|
21
|
-
token->value =
|
|
27
|
+
token->value = value;
|
|
22
28
|
|
|
23
29
|
token->type = type;
|
|
24
30
|
token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };
|
|
@@ -38,65 +44,147 @@ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer)
|
|
|
38
44
|
return token;
|
|
39
45
|
}
|
|
40
46
|
|
|
41
|
-
|
|
47
|
+
hb_string_T token_type_to_string(const token_type_T type) {
|
|
48
|
+
switch (type) {
|
|
49
|
+
case TOKEN_WHITESPACE: return hb_string("TOKEN_WHITESPACE");
|
|
50
|
+
case TOKEN_NBSP: return hb_string("TOKEN_NBSP");
|
|
51
|
+
case TOKEN_NEWLINE: return hb_string("TOKEN_NEWLINE");
|
|
52
|
+
case TOKEN_IDENTIFIER: return hb_string("TOKEN_IDENTIFIER");
|
|
53
|
+
case TOKEN_HTML_DOCTYPE: return hb_string("TOKEN_HTML_DOCTYPE");
|
|
54
|
+
case TOKEN_XML_DECLARATION: return hb_string("TOKEN_XML_DECLARATION");
|
|
55
|
+
case TOKEN_XML_DECLARATION_END: return hb_string("TOKEN_XML_DECLARATION_END");
|
|
56
|
+
case TOKEN_CDATA_START: return hb_string("TOKEN_CDATA_START");
|
|
57
|
+
case TOKEN_CDATA_END: return hb_string("TOKEN_CDATA_END");
|
|
58
|
+
case TOKEN_HTML_TAG_START: return hb_string("TOKEN_HTML_TAG_START");
|
|
59
|
+
case TOKEN_HTML_TAG_END: return hb_string("TOKEN_HTML_TAG_END");
|
|
60
|
+
case TOKEN_HTML_TAG_START_CLOSE: return hb_string("TOKEN_HTML_TAG_START_CLOSE");
|
|
61
|
+
case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("TOKEN_HTML_TAG_SELF_CLOSE");
|
|
62
|
+
case TOKEN_HTML_COMMENT_START: return hb_string("TOKEN_HTML_COMMENT_START");
|
|
63
|
+
case TOKEN_HTML_COMMENT_END: return hb_string("TOKEN_HTML_COMMENT_END");
|
|
64
|
+
case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("TOKEN_HTML_COMMENT_INVALID_END");
|
|
65
|
+
case TOKEN_EQUALS: return hb_string("TOKEN_EQUALS");
|
|
66
|
+
case TOKEN_QUOTE: return hb_string("TOKEN_QUOTE");
|
|
67
|
+
case TOKEN_BACKTICK: return hb_string("TOKEN_BACKTICK");
|
|
68
|
+
case TOKEN_BACKSLASH: return hb_string("TOKEN_BACKSLASH");
|
|
69
|
+
case TOKEN_DASH: return hb_string("TOKEN_DASH");
|
|
70
|
+
case TOKEN_UNDERSCORE: return hb_string("TOKEN_UNDERSCORE");
|
|
71
|
+
case TOKEN_EXCLAMATION: return hb_string("TOKEN_EXCLAMATION");
|
|
72
|
+
case TOKEN_SLASH: return hb_string("TOKEN_SLASH");
|
|
73
|
+
case TOKEN_SEMICOLON: return hb_string("TOKEN_SEMICOLON");
|
|
74
|
+
case TOKEN_COLON: return hb_string("TOKEN_COLON");
|
|
75
|
+
case TOKEN_AT: return hb_string("TOKEN_AT");
|
|
76
|
+
case TOKEN_LT: return hb_string("TOKEN_LT");
|
|
77
|
+
case TOKEN_PERCENT: return hb_string("TOKEN_PERCENT");
|
|
78
|
+
case TOKEN_AMPERSAND: return hb_string("TOKEN_AMPERSAND");
|
|
79
|
+
case TOKEN_ERB_START: return hb_string("TOKEN_ERB_START");
|
|
80
|
+
case TOKEN_ERB_CONTENT: return hb_string("TOKEN_ERB_CONTENT");
|
|
81
|
+
case TOKEN_ERB_END: return hb_string("TOKEN_ERB_END");
|
|
82
|
+
case TOKEN_CHARACTER: return hb_string("TOKEN_CHARACTER");
|
|
83
|
+
case TOKEN_ERROR: return hb_string("TOKEN_ERROR");
|
|
84
|
+
case TOKEN_EOF: return hb_string("TOKEN_EOF");
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
hb_string_T token_type_to_friendly_string(const token_type_T type) {
|
|
42
89
|
switch (type) {
|
|
43
|
-
case TOKEN_WHITESPACE: return "
|
|
44
|
-
case TOKEN_NBSP: return "
|
|
45
|
-
case TOKEN_NEWLINE: return "
|
|
46
|
-
case TOKEN_IDENTIFIER: return "
|
|
47
|
-
case TOKEN_HTML_DOCTYPE: return "
|
|
48
|
-
case TOKEN_XML_DECLARATION: return "
|
|
49
|
-
case TOKEN_XML_DECLARATION_END: return "
|
|
50
|
-
case TOKEN_CDATA_START: return "
|
|
51
|
-
case TOKEN_CDATA_END: return "
|
|
52
|
-
case TOKEN_HTML_TAG_START: return "
|
|
53
|
-
case TOKEN_HTML_TAG_END: return "
|
|
54
|
-
case TOKEN_HTML_TAG_START_CLOSE: return "
|
|
55
|
-
case TOKEN_HTML_TAG_SELF_CLOSE: return "
|
|
56
|
-
case TOKEN_HTML_COMMENT_START: return "
|
|
57
|
-
case TOKEN_HTML_COMMENT_END: return "
|
|
58
|
-
case
|
|
59
|
-
case
|
|
60
|
-
case
|
|
61
|
-
case
|
|
62
|
-
case
|
|
63
|
-
case
|
|
64
|
-
case
|
|
65
|
-
case
|
|
66
|
-
case
|
|
67
|
-
case
|
|
68
|
-
case
|
|
69
|
-
case
|
|
70
|
-
case
|
|
71
|
-
case
|
|
72
|
-
case
|
|
73
|
-
case
|
|
74
|
-
case
|
|
75
|
-
case
|
|
76
|
-
case
|
|
77
|
-
case
|
|
90
|
+
case TOKEN_WHITESPACE: return hb_string("whitespace");
|
|
91
|
+
case TOKEN_NBSP: return hb_string("non-breaking space");
|
|
92
|
+
case TOKEN_NEWLINE: return hb_string("a newline");
|
|
93
|
+
case TOKEN_IDENTIFIER: return hb_string("an identifier");
|
|
94
|
+
case TOKEN_HTML_DOCTYPE: return hb_string("`<!DOCTYPE`");
|
|
95
|
+
case TOKEN_XML_DECLARATION: return hb_string("`<?xml`");
|
|
96
|
+
case TOKEN_XML_DECLARATION_END: return hb_string("`?>`");
|
|
97
|
+
case TOKEN_CDATA_START: return hb_string("`<![CDATA[`");
|
|
98
|
+
case TOKEN_CDATA_END: return hb_string("`]]>`");
|
|
99
|
+
case TOKEN_HTML_TAG_START: return hb_string("`<`");
|
|
100
|
+
case TOKEN_HTML_TAG_END: return hb_string("`>`");
|
|
101
|
+
case TOKEN_HTML_TAG_START_CLOSE: return hb_string("`</`");
|
|
102
|
+
case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("`/>`");
|
|
103
|
+
case TOKEN_HTML_COMMENT_START: return hb_string("`<!--`");
|
|
104
|
+
case TOKEN_HTML_COMMENT_END: return hb_string("`-->`");
|
|
105
|
+
case TOKEN_HTML_COMMENT_INVALID_END: return hb_string("`--!>`");
|
|
106
|
+
case TOKEN_EQUALS: return hb_string("`=`");
|
|
107
|
+
case TOKEN_QUOTE: return hb_string("a quote");
|
|
108
|
+
case TOKEN_BACKTICK: return hb_string("a backtick");
|
|
109
|
+
case TOKEN_BACKSLASH: return hb_string("`\\`");
|
|
110
|
+
case TOKEN_DASH: return hb_string("`-`");
|
|
111
|
+
case TOKEN_UNDERSCORE: return hb_string("`_`");
|
|
112
|
+
case TOKEN_EXCLAMATION: return hb_string("`!`");
|
|
113
|
+
case TOKEN_SLASH: return hb_string("`/`");
|
|
114
|
+
case TOKEN_SEMICOLON: return hb_string("`;`");
|
|
115
|
+
case TOKEN_COLON: return hb_string("`:`");
|
|
116
|
+
case TOKEN_AT: return hb_string("`@`");
|
|
117
|
+
case TOKEN_LT: return hb_string("`<`");
|
|
118
|
+
case TOKEN_PERCENT: return hb_string("`%`");
|
|
119
|
+
case TOKEN_AMPERSAND: return hb_string("`&`");
|
|
120
|
+
case TOKEN_ERB_START: return hb_string("`<%`");
|
|
121
|
+
case TOKEN_ERB_CONTENT: return hb_string("ERB content");
|
|
122
|
+
case TOKEN_ERB_END: return hb_string("`%>`");
|
|
123
|
+
case TOKEN_CHARACTER: return hb_string("a character");
|
|
124
|
+
case TOKEN_ERROR: return hb_string("an error token");
|
|
125
|
+
case TOKEN_EOF: return hb_string("end of file");
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args) {
|
|
130
|
+
if ((int) first_token == TOKEN_SENTINEL) { return hb_allocator_strdup(allocator, ""); }
|
|
131
|
+
|
|
132
|
+
size_t count = 0;
|
|
133
|
+
hb_string_T names[32];
|
|
134
|
+
token_type_T current = first_token;
|
|
135
|
+
|
|
136
|
+
while ((int) current != TOKEN_SENTINEL && count < 32) {
|
|
137
|
+
names[count++] = token_type_to_friendly_string(current);
|
|
138
|
+
current = va_arg(args, token_type_T);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
hb_buffer_T buffer;
|
|
142
|
+
hb_buffer_init(&buffer, 128, allocator);
|
|
143
|
+
|
|
144
|
+
for (size_t i = 0; i < count; i++) {
|
|
145
|
+
hb_buffer_append_string(&buffer, names[i]);
|
|
146
|
+
|
|
147
|
+
if (i < count - 1) {
|
|
148
|
+
if (count > 2) { hb_buffer_append(&buffer, ", "); }
|
|
149
|
+
if (i == count - 2) { hb_buffer_append(&buffer, count == 2 ? " or " : "or "); }
|
|
150
|
+
}
|
|
78
151
|
}
|
|
79
152
|
|
|
80
|
-
return
|
|
153
|
+
return hb_buffer_value(&buffer);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...) {
|
|
157
|
+
va_list args;
|
|
158
|
+
va_start(args, first_token);
|
|
159
|
+
char* result = token_types_to_friendly_string_valist(allocator, first_token, args);
|
|
160
|
+
va_end(args);
|
|
161
|
+
return result;
|
|
81
162
|
}
|
|
82
163
|
|
|
83
|
-
hb_string_T token_to_string(const token_T* token) {
|
|
84
|
-
|
|
85
|
-
|
|
164
|
+
hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token) {
|
|
165
|
+
hb_string_T type_string = token_type_to_string(token->type);
|
|
166
|
+
hb_string_T template =
|
|
167
|
+
hb_string("#<Herb::Token type=\"%.*s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>");
|
|
168
|
+
|
|
169
|
+
char* string = hb_allocator_alloc(allocator, template.length + type_string.length + token->value.length + 16);
|
|
170
|
+
|
|
171
|
+
if (!string) { return HB_STRING_EMPTY; }
|
|
172
|
+
|
|
173
|
+
memset(string, 0, template.length + type_string.length + token->value.length + 16);
|
|
86
174
|
|
|
87
|
-
char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
|
|
88
175
|
hb_string_T escaped;
|
|
89
176
|
|
|
90
177
|
if (token->type == TOKEN_EOF) {
|
|
91
|
-
escaped = hb_string(
|
|
178
|
+
escaped = hb_string(hb_allocator_strdup(allocator, "<EOF>"));
|
|
92
179
|
} else {
|
|
93
|
-
escaped = escape_newlines(
|
|
180
|
+
escaped = escape_newlines(allocator, token_value(token));
|
|
94
181
|
}
|
|
95
182
|
|
|
96
183
|
sprintf(
|
|
97
184
|
string,
|
|
98
|
-
template,
|
|
99
|
-
type_string,
|
|
185
|
+
template.data,
|
|
186
|
+
type_string.length,
|
|
187
|
+
type_string.data,
|
|
100
188
|
escaped.length,
|
|
101
189
|
escaped.data,
|
|
102
190
|
token->range.from,
|
|
@@ -107,28 +195,27 @@ hb_string_T token_to_string(const token_T* token) {
|
|
|
107
195
|
token->location.end.column
|
|
108
196
|
);
|
|
109
197
|
|
|
110
|
-
|
|
198
|
+
hb_allocator_dealloc(allocator, escaped.data);
|
|
111
199
|
|
|
112
200
|
return hb_string(string);
|
|
113
201
|
}
|
|
114
202
|
|
|
115
|
-
|
|
203
|
+
hb_string_T token_value(const token_T* token) {
|
|
204
|
+
return token->value;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
int token_type(const token_T* token) {
|
|
208
|
+
return token->type;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
token_T* token_copy(token_T* token, hb_allocator_T* allocator) {
|
|
116
212
|
if (!token) { return NULL; }
|
|
117
213
|
|
|
118
|
-
token_T* new_token =
|
|
214
|
+
token_T* new_token = hb_allocator_alloc(allocator, sizeof(token_T));
|
|
119
215
|
|
|
120
216
|
if (!new_token) { return NULL; }
|
|
121
217
|
|
|
122
|
-
|
|
123
|
-
new_token->value = herb_strdup(token->value);
|
|
124
|
-
|
|
125
|
-
if (!new_token->value) {
|
|
126
|
-
free(new_token);
|
|
127
|
-
return NULL;
|
|
128
|
-
}
|
|
129
|
-
} else {
|
|
130
|
-
new_token->value = NULL;
|
|
131
|
-
}
|
|
218
|
+
new_token->value = token->value;
|
|
132
219
|
|
|
133
220
|
new_token->type = token->type;
|
|
134
221
|
new_token->range = token->range;
|
|
@@ -138,13 +225,11 @@ token_T* token_copy(token_T* token) {
|
|
|
138
225
|
}
|
|
139
226
|
|
|
140
227
|
bool token_value_empty(const token_T* token) {
|
|
141
|
-
return token == NULL || token->value
|
|
228
|
+
return token == NULL || hb_string_is_empty(token->value);
|
|
142
229
|
}
|
|
143
230
|
|
|
144
|
-
void token_free(token_T* token) {
|
|
231
|
+
void token_free(token_T* token, hb_allocator_T* allocator) {
|
|
145
232
|
if (!token) { return; }
|
|
146
233
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
free(token);
|
|
234
|
+
hb_allocator_dealloc(allocator, token);
|
|
150
235
|
}
|
|
@@ -4,15 +4,27 @@
|
|
|
4
4
|
#include "lexer_struct.h"
|
|
5
5
|
#include "position.h"
|
|
6
6
|
#include "token_struct.h"
|
|
7
|
+
#include "util/hb_allocator.h"
|
|
7
8
|
#include "util/hb_string.h"
|
|
8
9
|
|
|
10
|
+
#include <stdarg.h>
|
|
11
|
+
|
|
9
12
|
token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
|
|
10
|
-
hb_string_T token_to_string(const token_T* token);
|
|
11
|
-
|
|
13
|
+
hb_string_T token_to_string(hb_allocator_T* allocator, const token_T* token);
|
|
14
|
+
hb_string_T token_type_to_string(token_type_T type);
|
|
15
|
+
hb_string_T token_type_to_friendly_string(token_type_T type);
|
|
16
|
+
char* token_types_to_friendly_string_va(hb_allocator_T* allocator, token_type_T first_token, ...);
|
|
17
|
+
char* token_types_to_friendly_string_valist(hb_allocator_T* allocator, token_type_T first_token, va_list args);
|
|
18
|
+
|
|
19
|
+
#define token_types_to_friendly_string(allocator, ...) \
|
|
20
|
+
token_types_to_friendly_string_va(allocator, __VA_ARGS__, TOKEN_SENTINEL)
|
|
21
|
+
|
|
22
|
+
hb_string_T token_value(const token_T* token);
|
|
23
|
+
int token_type(const token_T* token);
|
|
12
24
|
|
|
13
|
-
token_T* token_copy(token_T* token);
|
|
25
|
+
token_T* token_copy(token_T* token, hb_allocator_T* allocator);
|
|
14
26
|
|
|
15
|
-
void token_free(token_T* token);
|
|
27
|
+
void token_free(token_T* token, hb_allocator_T* allocator);
|
|
16
28
|
|
|
17
29
|
bool token_value_empty(const token_T* token);
|
|
18
30
|
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
#ifndef HERB_TOKEN_STRUCT_H
|
|
2
2
|
#define HERB_TOKEN_STRUCT_H
|
|
3
3
|
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
4
6
|
#include "location.h"
|
|
5
7
|
#include "range.h"
|
|
8
|
+
#include "util/hb_string.h"
|
|
6
9
|
|
|
7
10
|
typedef enum {
|
|
8
11
|
TOKEN_WHITESPACE, // ' '
|
|
@@ -21,8 +24,9 @@ typedef enum {
|
|
|
21
24
|
TOKEN_HTML_TAG_END, // >
|
|
22
25
|
TOKEN_HTML_TAG_SELF_CLOSE, // />
|
|
23
26
|
|
|
24
|
-
TOKEN_HTML_COMMENT_START,
|
|
25
|
-
TOKEN_HTML_COMMENT_END,
|
|
27
|
+
TOKEN_HTML_COMMENT_START, // <!--
|
|
28
|
+
TOKEN_HTML_COMMENT_END, // -->
|
|
29
|
+
TOKEN_HTML_COMMENT_INVALID_END, // --!>
|
|
26
30
|
|
|
27
31
|
TOKEN_ERB_START, // <%, <%=, <%%=, <%#, <%-, <%==, <%%
|
|
28
32
|
TOKEN_ERB_CONTENT, // Ruby Code
|
|
@@ -48,8 +52,11 @@ typedef enum {
|
|
|
48
52
|
TOKEN_EOF,
|
|
49
53
|
} token_type_T;
|
|
50
54
|
|
|
55
|
+
// Sentinel value for variadic functions
|
|
56
|
+
#define TOKEN_SENTINEL 99999999
|
|
57
|
+
|
|
51
58
|
typedef struct TOKEN_STRUCT {
|
|
52
|
-
|
|
59
|
+
hb_string_T value;
|
|
53
60
|
range_T range;
|
|
54
61
|
location_T location;
|
|
55
62
|
token_type_T type;
|
package/extension/libherb/utf8.c
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#include "include/utf8.h"
|
|
2
|
+
#include "include/util/hb_string.h"
|
|
3
|
+
#include <stdint.h>
|
|
2
4
|
|
|
3
5
|
// UTF-8 byte patterns:
|
|
4
6
|
// 0xxxxxxx = 1 byte (ASCII)
|
|
@@ -24,19 +26,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
|
|
|
24
26
|
return (byte & 0xC0) == 0x80;
|
|
25
27
|
}
|
|
26
28
|
|
|
27
|
-
uint32_t utf8_sequence_length(
|
|
28
|
-
if (
|
|
29
|
+
uint32_t utf8_sequence_length(hb_string_T value) {
|
|
30
|
+
if (hb_string_is_empty(value)) { return 0; }
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
uint32_t expected_length = utf8_char_byte_length(first_byte);
|
|
32
|
+
uint32_t expected_length = utf8_char_byte_length((unsigned char) value.data[0]);
|
|
32
33
|
|
|
33
|
-
if (
|
|
34
|
+
if (value.length < expected_length) {
|
|
34
35
|
return 1; // Not enough bytes, treat as single byte
|
|
35
36
|
}
|
|
36
37
|
|
|
37
38
|
if (expected_length > 1) {
|
|
38
39
|
for (uint32_t i = 1; i < expected_length; i++) {
|
|
39
|
-
if (!utf8_is_valid_continuation_byte((unsigned char)
|
|
40
|
+
if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) {
|
|
40
41
|
return 1; // Invalid continuation byte, treat first byte as single byte
|
|
41
42
|
}
|
|
42
43
|
}
|
package/extension/libherb/utf8.h
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
#ifndef HERB_UTF8_H
|
|
2
2
|
#define HERB_UTF8_H
|
|
3
3
|
|
|
4
|
+
#include "util/hb_string.h"
|
|
4
5
|
#include <stdbool.h>
|
|
5
6
|
#include <stdint.h>
|
|
6
7
|
#include <stdlib.h>
|
|
7
8
|
|
|
8
9
|
uint32_t utf8_char_byte_length(unsigned char first_byte);
|
|
9
|
-
uint32_t utf8_sequence_length(
|
|
10
|
+
uint32_t utf8_sequence_length(hb_string_T value);
|
|
10
11
|
bool utf8_is_valid_continuation_byte(unsigned char byte);
|
|
11
12
|
|
|
12
13
|
#endif
|