herb 0.8.10-arm-linux-gnu → 0.9.1-arm-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +11 -3
- data/README.md +64 -34
- data/Rakefile +48 -40
- data/config.yml +473 -34
- data/ext/herb/error_helpers.c +535 -140
- data/ext/herb/error_helpers.h +1 -0
- data/ext/herb/extconf.rb +67 -28
- data/ext/herb/extension.c +321 -51
- data/ext/herb/extension.h +1 -0
- data/ext/herb/extension_helpers.c +24 -14
- data/ext/herb/extension_helpers.h +2 -2
- data/ext/herb/nodes.c +647 -270
- data/ext/herb/nodes.h +1 -0
- data/herb.gemspec +3 -2
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/4.0/herb.so +0 -0
- data/lib/herb/ast/helpers.rb +3 -3
- data/lib/herb/ast/node.rb +15 -2
- data/lib/herb/ast/nodes.rb +1530 -179
- data/lib/herb/bootstrap.rb +87 -0
- data/lib/herb/cli.rb +341 -31
- data/lib/herb/configuration.rb +248 -0
- data/lib/herb/defaults.yml +32 -0
- data/lib/herb/engine/compiler.rb +78 -11
- data/lib/herb/engine/debug_visitor.rb +13 -3
- data/lib/herb/engine/error_formatter.rb +13 -9
- data/lib/herb/engine/parser_error_overlay.rb +10 -6
- data/lib/herb/engine/validator.rb +8 -3
- data/lib/herb/engine/validators/nesting_validator.rb +2 -2
- data/lib/herb/engine.rb +119 -43
- data/lib/herb/errors.rb +808 -88
- data/lib/herb/lex_result.rb +1 -0
- data/lib/herb/location.rb +7 -3
- data/lib/herb/parse_result.rb +12 -2
- data/lib/herb/parser_options.rb +62 -0
- data/lib/herb/position.rb +1 -0
- data/lib/herb/prism_inspect.rb +120 -0
- data/lib/herb/project.rb +923 -331
- data/lib/herb/range.rb +1 -0
- data/lib/herb/token.rb +7 -1
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +47 -2
- data/lib/herb/warnings.rb +6 -1
- data/lib/herb.rb +35 -3
- data/sig/herb/ast/helpers.rbs +2 -2
- data/sig/herb/ast/node.rbs +12 -2
- data/sig/herb/ast/nodes.rbs +773 -128
- data/sig/herb/bootstrap.rbs +31 -0
- data/sig/herb/configuration.rbs +89 -0
- data/sig/herb/engine/compiler.rbs +9 -1
- data/sig/herb/engine/debug_visitor.rbs +2 -0
- data/sig/herb/engine/validator.rbs +5 -1
- data/sig/herb/engine.rbs +21 -3
- data/sig/herb/errors.rbs +372 -63
- data/sig/herb/location.rbs +4 -0
- data/sig/herb/parse_result.rbs +4 -2
- data/sig/herb/parser_options.rbs +46 -0
- data/sig/herb/position.rbs +1 -0
- data/sig/herb/prism_inspect.rbs +28 -0
- data/sig/herb/range.rbs +1 -0
- data/sig/herb/token.rbs +6 -0
- data/sig/herb/visitor.rbs +31 -4
- data/sig/herb/warnings.rbs +6 -1
- data/sig/herb.rbs +14 -0
- data/sig/herb_c_extension.rbs +5 -2
- data/sig/rubyvm.rbs +5 -0
- data/sig/serialized_ast_errors.rbs +82 -6
- data/sig/serialized_ast_nodes.rbs +91 -6
- data/src/analyze/action_view/attribute_extraction_helpers.c +303 -0
- data/src/analyze/action_view/content_tag.c +78 -0
- data/src/analyze/action_view/link_to.c +167 -0
- data/src/analyze/action_view/registry.c +83 -0
- data/src/analyze/action_view/tag.c +70 -0
- data/src/analyze/action_view/tag_helper_node_builders.c +305 -0
- data/src/analyze/action_view/tag_helpers.c +815 -0
- data/src/analyze/action_view/turbo_frame_tag.c +88 -0
- data/src/analyze/analyze.c +885 -0
- data/src/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- data/src/analyze/builders.c +343 -0
- data/src/analyze/conditional_elements.c +594 -0
- data/src/analyze/conditional_open_tags.c +640 -0
- data/src/analyze/control_type.c +250 -0
- data/src/{analyze_helpers.c → analyze/helpers.c} +48 -23
- data/src/analyze/invalid_structures.c +193 -0
- data/src/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- data/src/analyze/parse_errors.c +84 -0
- data/src/analyze/prism_annotate.c +399 -0
- data/src/analyze/render_nodes.c +761 -0
- data/src/{analyze_transform.c → analyze/transform.c} +24 -3
- data/src/ast_node.c +17 -7
- data/src/ast_nodes.c +759 -387
- data/src/ast_pretty_print.c +264 -6
- data/src/errors.c +1454 -519
- data/src/extract.c +145 -49
- data/src/herb.c +52 -34
- data/src/html_util.c +241 -12
- data/src/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- data/src/include/analyze/action_view/tag_helper_handler.h +43 -0
- data/src/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- data/src/include/analyze/action_view/tag_helpers.h +38 -0
- data/src/include/{analyze.h → analyze/analyze.h} +14 -4
- data/src/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- data/src/include/analyze/builders.h +27 -0
- data/src/include/analyze/conditional_elements.h +9 -0
- data/src/include/analyze/conditional_open_tags.h +9 -0
- data/src/include/analyze/control_type.h +14 -0
- data/src/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
- data/src/include/analyze/invalid_structures.h +11 -0
- data/src/include/analyze/prism_annotate.h +16 -0
- data/src/include/analyze/render_nodes.h +11 -0
- data/src/include/ast_node.h +11 -5
- data/src/include/ast_nodes.h +154 -38
- data/src/include/ast_pretty_print.h +5 -0
- data/src/include/element_source.h +3 -8
- data/src/include/errors.h +206 -55
- data/src/include/extract.h +21 -5
- data/src/include/herb.h +18 -6
- data/src/include/herb_prism_node.h +13 -0
- data/src/include/html_util.h +7 -2
- data/src/include/io.h +3 -1
- data/src/include/lex_helpers.h +29 -0
- data/src/include/lexer.h +1 -1
- data/src/include/lexer_peek_helpers.h +87 -13
- data/src/include/lexer_struct.h +2 -0
- data/src/include/location.h +2 -1
- data/src/include/parser.h +28 -2
- data/src/include/parser_helpers.h +19 -3
- data/src/include/pretty_print.h +10 -5
- data/src/include/prism_context.h +45 -0
- data/src/include/prism_helpers.h +10 -7
- data/src/include/prism_serialized.h +12 -0
- data/src/include/token.h +16 -4
- data/src/include/token_struct.h +10 -3
- data/src/include/utf8.h +2 -1
- data/src/include/util/hb_allocator.h +78 -0
- data/src/include/util/hb_arena.h +6 -1
- data/src/include/util/hb_arena_debug.h +12 -1
- data/src/include/util/hb_array.h +7 -3
- data/src/include/util/hb_buffer.h +6 -4
- data/src/include/util/hb_foreach.h +79 -0
- data/src/include/util/hb_narray.h +8 -4
- data/src/include/util/hb_string.h +56 -9
- data/src/include/util.h +6 -3
- data/src/include/version.h +1 -1
- data/src/io.c +3 -2
- data/src/lexer.c +42 -30
- data/src/lexer_peek_helpers.c +12 -74
- data/src/location.c +2 -2
- data/src/main.c +53 -28
- data/src/parser.c +784 -247
- data/src/parser_helpers.c +110 -23
- data/src/parser_match_tags.c +129 -48
- data/src/pretty_print.c +29 -24
- data/src/prism_helpers.c +30 -27
- data/src/ruby_parser.c +2 -0
- data/src/token.c +151 -66
- data/src/token_matchers.c +0 -1
- data/src/utf8.c +7 -6
- data/src/util/hb_allocator.c +341 -0
- data/src/util/hb_arena.c +81 -56
- data/src/util/hb_arena_debug.c +32 -17
- data/src/util/hb_array.c +30 -15
- data/src/util/hb_buffer.c +17 -21
- data/src/util/hb_narray.c +22 -7
- data/src/util/hb_string.c +49 -35
- data/src/util.c +21 -11
- data/src/visitor.c +67 -0
- data/templates/ext/herb/error_helpers.c.erb +24 -11
- data/templates/ext/herb/error_helpers.h.erb +1 -0
- data/templates/ext/herb/nodes.c.erb +50 -16
- data/templates/ext/herb/nodes.h.erb +1 -0
- data/templates/java/error_helpers.c.erb +1 -1
- data/templates/java/nodes.c.erb +30 -8
- data/templates/java/org/herb/ast/Errors.java.erb +24 -1
- data/templates/java/org/herb/ast/Nodes.java.erb +80 -21
- data/templates/javascript/packages/core/src/errors.ts.erb +16 -3
- data/templates/javascript/packages/core/src/node-type-guards.ts.erb +3 -1
- data/templates/javascript/packages/core/src/nodes.ts.erb +109 -32
- data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +13 -4
- data/templates/javascript/packages/node/extension/nodes.cpp.erb +43 -4
- data/templates/lib/herb/ast/nodes.rb.erb +95 -32
- data/templates/lib/herb/errors.rb.erb +15 -3
- data/templates/lib/herb/visitor.rb.erb +2 -2
- data/templates/rust/src/ast/nodes.rs.erb +97 -44
- data/templates/rust/src/errors.rs.erb +2 -1
- data/templates/rust/src/nodes.rs.erb +168 -16
- data/templates/rust/src/union_types.rs.erb +60 -0
- data/templates/rust/src/visitor.rs.erb +81 -0
- data/templates/src/{analyze_missing_end.c.erb → analyze/missing_end.c.erb} +9 -6
- data/templates/src/{analyze_transform.c.erb → analyze/transform.c.erb} +2 -2
- data/templates/src/ast_nodes.c.erb +34 -26
- data/templates/src/ast_pretty_print.c.erb +24 -5
- data/templates/src/errors.c.erb +60 -54
- data/templates/src/include/ast_nodes.h.erb +6 -2
- data/templates/src/include/ast_pretty_print.h.erb +5 -0
- data/templates/src/include/errors.h.erb +15 -11
- data/templates/src/include/util/hb_foreach.h.erb +20 -0
- data/templates/src/parser_match_tags.c.erb +10 -4
- data/templates/src/visitor.c.erb +2 -2
- data/templates/template.rb +204 -29
- data/templates/wasm/error_helpers.cpp.erb +9 -5
- data/templates/wasm/nodes.cpp.erb +41 -4
- metadata +60 -16
- data/src/analyze.c +0 -1608
- data/src/element_source.c +0 -12
- data/src/include/util/hb_system.h +0 -9
- data/src/util/hb_system.c +0 -30
|
@@ -4,26 +4,73 @@
|
|
|
4
4
|
#include <stdbool.h>
|
|
5
5
|
#include <stddef.h>
|
|
6
6
|
#include <stdint.h>
|
|
7
|
+
#include <string.h>
|
|
8
|
+
#include <strings.h>
|
|
7
9
|
|
|
8
|
-
#include "
|
|
10
|
+
#include "hb_allocator.h"
|
|
11
|
+
#include "hb_foreach.h"
|
|
9
12
|
|
|
10
13
|
typedef struct HB_STRING_STRUCT {
|
|
11
14
|
char* data;
|
|
12
15
|
uint32_t length;
|
|
13
16
|
} hb_string_T;
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
#define HB_STRING_EMPTY ((hb_string_T) { .data = "", .length = 0 })
|
|
19
|
+
#define HB_STRING_NULL ((hb_string_T) { .data = NULL, .length = 0 })
|
|
20
|
+
|
|
21
|
+
#define HB_STRING_LITERAL(string) { .data = (char*) (string), .length = (uint32_t) (sizeof(string) - 1) }
|
|
22
|
+
|
|
23
|
+
#define HB_STRING_LIST(...) { HB_FOR_EACH(HB_STRING_LITERAL, __VA_ARGS__) }
|
|
24
|
+
|
|
25
|
+
#define hb_string(string) \
|
|
26
|
+
(__builtin_constant_p(string) \
|
|
27
|
+
? ((hb_string_T) { .data = (char*) (string), .length = (uint32_t) __builtin_strlen(string) }) \
|
|
28
|
+
: hb_string_from_c_string(string))
|
|
29
|
+
|
|
30
|
+
hb_string_T hb_string_from_c_string(const char* null_terminated_c_string);
|
|
31
|
+
|
|
32
|
+
static inline bool hb_string_is_null(hb_string_T string) {
|
|
33
|
+
return string.data == NULL;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static inline bool hb_string_is_empty(hb_string_T string) {
|
|
37
|
+
return string.data == NULL || string.length == 0;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static inline hb_string_T hb_string_slice(hb_string_T string, uint32_t offset) {
|
|
41
|
+
if (string.length < offset) { return HB_STRING_NULL; }
|
|
42
|
+
|
|
43
|
+
return (hb_string_T) { .data = string.data + offset, .length = string.length - offset };
|
|
44
|
+
}
|
|
22
45
|
|
|
46
|
+
static inline bool hb_string_equals(hb_string_T a, hb_string_T b) {
|
|
47
|
+
if (a.length != b.length) { return false; }
|
|
48
|
+
|
|
49
|
+
return strncmp(a.data, b.data, a.length) == 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static inline bool hb_string_equals_case_insensitive(hb_string_T a, hb_string_T b) {
|
|
53
|
+
if (a.length != b.length) { return false; }
|
|
54
|
+
|
|
55
|
+
return strncasecmp(a.data, b.data, a.length) == 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static inline bool hb_string_starts_with(hb_string_T string, hb_string_T expected_prefix) {
|
|
59
|
+
if (hb_string_is_empty(string) || hb_string_is_empty(expected_prefix)) { return false; }
|
|
60
|
+
if (string.length < expected_prefix.length) { return false; }
|
|
61
|
+
|
|
62
|
+
return strncmp(string.data, expected_prefix.data, expected_prefix.length) == 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
hb_string_T hb_string_truncate(hb_string_T string, uint32_t max_length);
|
|
23
66
|
hb_string_T hb_string_range(hb_string_T string, uint32_t from, uint32_t to);
|
|
67
|
+
hb_string_T hb_string_trim_start(hb_string_T string);
|
|
68
|
+
hb_string_T hb_string_trim_end(hb_string_T string);
|
|
69
|
+
hb_string_T hb_string_trim(hb_string_T string);
|
|
70
|
+
bool hb_string_is_blank(hb_string_T string);
|
|
71
|
+
hb_string_T hb_string_copy(hb_string_T string, hb_allocator_T* allocator);
|
|
24
72
|
|
|
25
73
|
char* hb_string_to_c_string_using_malloc(hb_string_T string);
|
|
26
|
-
|
|
27
74
|
char* hb_string_to_c_string(hb_arena_T* allocator, hb_string_T string);
|
|
28
75
|
|
|
29
76
|
#endif
|
data/src/include/util.h
CHANGED
|
@@ -5,10 +5,13 @@
|
|
|
5
5
|
#include <stdbool.h>
|
|
6
6
|
#include <stdlib.h>
|
|
7
7
|
|
|
8
|
+
struct hb_allocator;
|
|
9
|
+
|
|
8
10
|
int is_newline(int character);
|
|
11
|
+
int is_whitespace(int character);
|
|
12
|
+
hb_string_T escape_newlines(struct hb_allocator* allocator, hb_string_T input);
|
|
13
|
+
hb_string_T quoted_string(struct hb_allocator* allocator, hb_string_T input);
|
|
9
14
|
|
|
10
|
-
|
|
11
|
-
hb_string_T quoted_string(hb_string_T input);
|
|
12
|
-
char* herb_strdup(const char* s);
|
|
15
|
+
char* convert_underscores_to_dashes(const char* input);
|
|
13
16
|
|
|
14
17
|
#endif
|
data/src/include/version.h
CHANGED
data/src/io.c
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "include/io.h"
|
|
2
|
+
#include "include/util/hb_allocator.h"
|
|
2
3
|
#include "include/util/hb_buffer.h"
|
|
3
4
|
|
|
4
5
|
#include <errno.h>
|
|
@@ -7,7 +8,7 @@
|
|
|
7
8
|
|
|
8
9
|
#define FILE_READ_CHUNK 4096
|
|
9
10
|
|
|
10
|
-
char* herb_read_file(const char* filename) {
|
|
11
|
+
char* herb_read_file(const char* filename, struct hb_allocator* allocator) {
|
|
11
12
|
if (!filename) { return NULL; }
|
|
12
13
|
|
|
13
14
|
FILE* fp = fopen(filename, "rb");
|
|
@@ -18,7 +19,7 @@ char* herb_read_file(const char* filename) {
|
|
|
18
19
|
}
|
|
19
20
|
|
|
20
21
|
hb_buffer_T buffer;
|
|
21
|
-
hb_buffer_init(&buffer, 4096);
|
|
22
|
+
hb_buffer_init(&buffer, 4096, allocator);
|
|
22
23
|
|
|
23
24
|
char chunk[FILE_READ_CHUNK];
|
|
24
25
|
size_t bytes_read;
|
data/src/lexer.c
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
#include "include/lexer_peek_helpers.h"
|
|
2
|
+
#include "include/macros.h"
|
|
2
3
|
#include "include/token.h"
|
|
3
4
|
#include "include/utf8.h"
|
|
4
5
|
#include "include/util.h"
|
|
5
|
-
#include "include/util/hb_buffer.h"
|
|
6
6
|
#include "include/util/hb_string.h"
|
|
7
7
|
|
|
8
8
|
#include <ctype.h>
|
|
9
|
+
#include <stdint.h>
|
|
9
10
|
#include <string.h>
|
|
10
11
|
|
|
11
12
|
#define LEXER_STALL_LIMIT 5
|
|
12
13
|
|
|
14
|
+
static hb_string_T erb_open_patterns[] = HB_STRING_LIST("<%==", "<%%=", "<%graphql", "<%=", "<%#", "<%-", "<%%", "<%");
|
|
15
|
+
|
|
13
16
|
static bool lexer_eof(const lexer_T* lexer) {
|
|
14
17
|
return lexer->current_character == '\0' || lexer->stalled;
|
|
15
18
|
}
|
|
@@ -31,11 +34,13 @@ static bool lexer_stalled(lexer_T* lexer) {
|
|
|
31
34
|
return lexer->stalled;
|
|
32
35
|
}
|
|
33
36
|
|
|
34
|
-
void lexer_init(lexer_T* lexer, const char* source) {
|
|
37
|
+
void lexer_init(lexer_T* lexer, const char* source, hb_allocator_T* allocator) {
|
|
38
|
+
lexer->allocator = allocator;
|
|
39
|
+
|
|
35
40
|
if (source != NULL) {
|
|
36
41
|
lexer->source = hb_string(source);
|
|
37
42
|
} else {
|
|
38
|
-
lexer->source =
|
|
43
|
+
lexer->source = HB_STRING_EMPTY;
|
|
39
44
|
}
|
|
40
45
|
|
|
41
46
|
lexer->current_character = lexer->source.data[0];
|
|
@@ -55,11 +60,11 @@ void lexer_init(lexer_T* lexer, const char* source) {
|
|
|
55
60
|
}
|
|
56
61
|
|
|
57
62
|
token_T* lexer_error(lexer_T* lexer, const char* message) {
|
|
58
|
-
char
|
|
63
|
+
char buffer[128];
|
|
59
64
|
|
|
60
65
|
snprintf(
|
|
61
|
-
|
|
62
|
-
sizeof(
|
|
66
|
+
buffer,
|
|
67
|
+
sizeof(buffer),
|
|
63
68
|
"[Lexer] Error: %s (character '%c', line %u, col %u)\n",
|
|
64
69
|
message,
|
|
65
70
|
lexer->current_character,
|
|
@@ -67,7 +72,10 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
|
|
|
67
72
|
lexer->current_column
|
|
68
73
|
);
|
|
69
74
|
|
|
70
|
-
|
|
75
|
+
size_t length = strlen(buffer);
|
|
76
|
+
char* error_message = hb_allocator_strndup(lexer->allocator, buffer, length);
|
|
77
|
+
|
|
78
|
+
return token_init((hb_string_T) { .data = error_message, .length = (uint32_t) length }, TOKEN_ERROR, lexer);
|
|
71
79
|
}
|
|
72
80
|
|
|
73
81
|
static void lexer_advance(lexer_T* lexer) {
|
|
@@ -79,8 +87,8 @@ static void lexer_advance(lexer_T* lexer) {
|
|
|
79
87
|
}
|
|
80
88
|
}
|
|
81
89
|
|
|
82
|
-
static void lexer_advance_utf8_bytes(lexer_T* lexer,
|
|
83
|
-
if (byte_count
|
|
90
|
+
static void lexer_advance_utf8_bytes(lexer_T* lexer, uint32_t byte_count) {
|
|
91
|
+
if (byte_count == 0) { return; }
|
|
84
92
|
|
|
85
93
|
if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
|
|
86
94
|
if (!is_newline(lexer->current_character)) { lexer->current_column++; }
|
|
@@ -120,19 +128,17 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type
|
|
|
120
128
|
}
|
|
121
129
|
|
|
122
130
|
static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
|
|
123
|
-
|
|
124
|
-
buffer[0] = lexer->current_character;
|
|
125
|
-
buffer[1] = '\0';
|
|
126
|
-
|
|
127
|
-
return lexer_advance_with(lexer, hb_string(buffer), type);
|
|
131
|
+
return lexer_advance_with_next(lexer, 1, type);
|
|
128
132
|
}
|
|
129
133
|
|
|
130
134
|
static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
|
|
131
|
-
|
|
135
|
+
uint32_t char_byte_length = utf8_sequence_length(hb_string_slice(lexer->source, lexer->current_position));
|
|
136
|
+
|
|
132
137
|
if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
|
|
138
|
+
|
|
133
139
|
uint32_t start_position = lexer->current_position;
|
|
134
140
|
|
|
135
|
-
for (
|
|
141
|
+
for (uint32_t i = 0; i < char_byte_length; i++) {
|
|
136
142
|
if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); }
|
|
137
143
|
}
|
|
138
144
|
|
|
@@ -171,7 +177,8 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
|
|
|
171
177
|
|
|
172
178
|
while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
|
|
173
179
|
|| lexer->current_character == ':')
|
|
174
|
-
&& !lexer_peek_for_html_comment_end(lexer, 0) && !
|
|
180
|
+
&& !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_peek_for_html_comment_invalid_end(lexer, 0)
|
|
181
|
+
&& !lexer_eof(lexer)) {
|
|
175
182
|
|
|
176
183
|
lexer_advance(lexer);
|
|
177
184
|
}
|
|
@@ -185,13 +192,9 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
|
|
|
185
192
|
// ===== ERB Parsing
|
|
186
193
|
|
|
187
194
|
static token_T* lexer_parse_erb_open(lexer_T* lexer) {
|
|
188
|
-
hb_string_T erb_patterns[] = { hb_string("<%=="), hb_string("<%%="), hb_string("<%="), hb_string("<%#"),
|
|
189
|
-
hb_string("<%-"), hb_string("<%%"), hb_string("<%graphql"), hb_string("<%") };
|
|
190
|
-
|
|
191
195
|
lexer->state = STATE_ERB_CONTENT;
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START);
|
|
196
|
+
for (size_t i = 0; i < sizeof(erb_open_patterns) / sizeof(erb_open_patterns[0]); i++) {
|
|
197
|
+
token_T* match = lexer_match_and_advance(lexer, erb_open_patterns[i], TOKEN_ERB_START);
|
|
195
198
|
if (match) { return match; }
|
|
196
199
|
}
|
|
197
200
|
|
|
@@ -203,11 +206,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
|
|
|
203
206
|
|
|
204
207
|
while (!lexer_peek_erb_end(lexer, 0)) {
|
|
205
208
|
if (lexer_eof(lexer)) {
|
|
206
|
-
token_T* token =
|
|
207
|
-
hb_string_range(lexer->source, start_position, lexer->current_position),
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
209
|
+
token_T* token =
|
|
210
|
+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
|
|
211
|
+
|
|
212
|
+
return token;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (lexer_peek_erb_start(lexer, 0)) {
|
|
216
|
+
lexer->state = STATE_DATA;
|
|
217
|
+
|
|
218
|
+
token_T* token =
|
|
219
|
+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
|
|
211
220
|
|
|
212
221
|
return token;
|
|
213
222
|
}
|
|
@@ -244,7 +253,7 @@ static token_T* lexer_parse_erb_close(lexer_T* lexer) {
|
|
|
244
253
|
// ===== Tokenizing Function
|
|
245
254
|
|
|
246
255
|
token_T* lexer_next_token(lexer_T* lexer) {
|
|
247
|
-
if (lexer_eof(lexer)) { return token_init(
|
|
256
|
+
if (lexer_eof(lexer)) { return token_init(HB_STRING_EMPTY, TOKEN_EOF, lexer); }
|
|
248
257
|
if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
|
|
249
258
|
|
|
250
259
|
if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
|
|
@@ -302,7 +311,10 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
|
302
311
|
}
|
|
303
312
|
|
|
304
313
|
case '-': {
|
|
305
|
-
token_T* token = lexer_match_and_advance(lexer, hb_string("
|
|
314
|
+
token_T* token = lexer_match_and_advance(lexer, hb_string("--!>"), TOKEN_HTML_COMMENT_INVALID_END);
|
|
315
|
+
if (token) { return token; }
|
|
316
|
+
|
|
317
|
+
token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
|
|
306
318
|
return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
|
|
307
319
|
}
|
|
308
320
|
|
data/src/lexer_peek_helpers.c
CHANGED
|
@@ -1,22 +1,10 @@
|
|
|
1
1
|
#include "include/lexer_peek_helpers.h"
|
|
2
2
|
#include "include/lexer.h"
|
|
3
|
-
#include "include/lexer_struct.h"
|
|
4
|
-
#include "include/macros.h"
|
|
5
3
|
#include "include/token.h"
|
|
6
|
-
#include "include/util/hb_string.h"
|
|
7
4
|
|
|
8
5
|
#include <ctype.h>
|
|
9
|
-
#include <stdbool.h>
|
|
10
6
|
|
|
11
|
-
|
|
12
|
-
return lexer->source.data[MAX(lexer->current_position - offset, 0)];
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
char lexer_peek(const lexer_T* lexer, uint32_t offset) {
|
|
16
|
-
return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) {
|
|
7
|
+
static bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, bool case_insensitive) {
|
|
20
8
|
hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset);
|
|
21
9
|
remaining_source.length = MIN(pattern.length, remaining_source.length);
|
|
22
10
|
|
|
@@ -47,31 +35,19 @@ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset) {
|
|
|
47
35
|
return lexer_peek_for(lexer, offset, hb_string("<!--"), false);
|
|
48
36
|
}
|
|
49
37
|
|
|
50
|
-
bool
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
55
|
-
return lexer_peek_for(lexer, offset, hb_string("%>"), false);
|
|
56
|
-
}
|
|
38
|
+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
|
|
39
|
+
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
|
|
57
40
|
|
|
58
|
-
|
|
59
|
-
return lexer_peek_for(lexer, offset, hb_string("-%>"), false);
|
|
60
|
-
}
|
|
41
|
+
uint32_t position = offset + 2;
|
|
61
42
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
43
|
+
while (lexer_peek(lexer, position) == ' ' || lexer_peek(lexer, position) == '\t'
|
|
44
|
+
|| lexer_peek(lexer, position) == '\n' || lexer_peek(lexer, position) == '\r') {
|
|
45
|
+
position++;
|
|
46
|
+
}
|
|
65
47
|
|
|
66
|
-
|
|
67
|
-
return lexer_peek_for(lexer, offset, hb_string("=%>"), false);
|
|
68
|
-
}
|
|
48
|
+
char character = lexer_peek(lexer, position);
|
|
69
49
|
|
|
70
|
-
|
|
71
|
-
return (
|
|
72
|
-
lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
|
|
73
|
-
|| lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset)
|
|
74
|
-
);
|
|
50
|
+
return isalpha(character) || character == '_';
|
|
75
51
|
}
|
|
76
52
|
|
|
77
53
|
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
|
|
@@ -84,13 +60,13 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
|
|
|
84
60
|
token_T* token = lexer_next_token(lexer);
|
|
85
61
|
|
|
86
62
|
while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) {
|
|
87
|
-
token_free(token);
|
|
63
|
+
token_free(token, lexer->allocator);
|
|
88
64
|
token = lexer_next_token(lexer);
|
|
89
65
|
}
|
|
90
66
|
|
|
91
67
|
bool result = (token && token->type == token_type);
|
|
92
68
|
|
|
93
|
-
if (token) { token_free(token); }
|
|
69
|
+
if (token) { token_free(token, lexer->allocator); }
|
|
94
70
|
|
|
95
71
|
lexer->current_position = saved_position;
|
|
96
72
|
lexer->current_line = saved_line;
|
|
@@ -100,41 +76,3 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
|
|
|
100
76
|
|
|
101
77
|
return result;
|
|
102
78
|
}
|
|
103
|
-
|
|
104
|
-
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
|
|
105
|
-
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
|
|
106
|
-
|
|
107
|
-
uint32_t pos = offset + 2;
|
|
108
|
-
|
|
109
|
-
while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
|
|
110
|
-
|| lexer_peek(lexer, pos) == '\r') {
|
|
111
|
-
pos++;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
char c = lexer_peek(lexer, pos);
|
|
115
|
-
|
|
116
|
-
return isalpha(c) || c == '_';
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
|
|
120
|
-
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
|
|
121
|
-
.line = lexer->current_line,
|
|
122
|
-
.column = lexer->current_column,
|
|
123
|
-
.previous_position = lexer->previous_position,
|
|
124
|
-
.previous_line = lexer->previous_line,
|
|
125
|
-
.previous_column = lexer->previous_column,
|
|
126
|
-
.current_character = lexer->current_character,
|
|
127
|
-
.state = lexer->state };
|
|
128
|
-
return snapshot;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
|
|
132
|
-
lexer->current_position = snapshot.position;
|
|
133
|
-
lexer->current_line = snapshot.line;
|
|
134
|
-
lexer->current_column = snapshot.column;
|
|
135
|
-
lexer->previous_position = snapshot.previous_position;
|
|
136
|
-
lexer->previous_line = snapshot.previous_line;
|
|
137
|
-
lexer->previous_column = snapshot.previous_column;
|
|
138
|
-
lexer->current_character = snapshot.current_character;
|
|
139
|
-
lexer->state = snapshot.state;
|
|
140
|
-
}
|
data/src/location.c
CHANGED
|
@@ -17,8 +17,8 @@ void location_from_positions(location_T* location, position_T start, position_T
|
|
|
17
17
|
location->end = end;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
location_T* location_create(position_T start, position_T end) {
|
|
21
|
-
location_T* location =
|
|
20
|
+
location_T* location_create(position_T start, position_T end, hb_allocator_T* allocator) {
|
|
21
|
+
location_T* location = hb_allocator_alloc(allocator, sizeof(location_T));
|
|
22
22
|
|
|
23
23
|
if (location != NULL) {
|
|
24
24
|
location->start = start;
|
data/src/main.c
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
#define _POSIX_C_SOURCE 199309L // Enables `clock_gettime()`
|
|
2
2
|
|
|
3
|
-
#include "include/analyze.h"
|
|
4
3
|
#include "include/ast_node.h"
|
|
5
4
|
#include "include/ast_nodes.h"
|
|
6
|
-
|
|
5
|
+
|
|
6
|
+
#ifndef HERB_EXCLUDE_PRETTYPRINT
|
|
7
|
+
# include "include/ast_pretty_print.h"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
7
10
|
#include "include/extract.h"
|
|
8
11
|
#include "include/herb.h"
|
|
9
12
|
#include "include/io.h"
|
|
13
|
+
#include "include/lex_helpers.h"
|
|
14
|
+
#include "include/macros.h"
|
|
10
15
|
#include "include/ruby_parser.h"
|
|
16
|
+
#include "include/util/hb_allocator.h"
|
|
17
|
+
#include "include/util/hb_arena.h"
|
|
18
|
+
#include "include/util/hb_arena_debug.h"
|
|
11
19
|
#include "include/util/hb_buffer.h"
|
|
12
20
|
#include "include/util/string.h"
|
|
13
21
|
|
|
@@ -38,7 +46,7 @@ int main(const int argc, char* argv[]) {
|
|
|
38
46
|
if (argc < 2) {
|
|
39
47
|
puts("./herb [command] [options]\n");
|
|
40
48
|
|
|
41
|
-
puts("Herb 🌿 Powerful and seamless HTML-aware ERB
|
|
49
|
+
puts("Herb 🌿 Powerful and seamless HTML-aware ERB toolchain.\n");
|
|
42
50
|
|
|
43
51
|
puts("./herb lex [file] - Lex a file");
|
|
44
52
|
puts("./herb parse [file] - Parse a file");
|
|
@@ -54,74 +62,90 @@ int main(const int argc, char* argv[]) {
|
|
|
54
62
|
return EXIT_FAILURE;
|
|
55
63
|
}
|
|
56
64
|
|
|
57
|
-
|
|
65
|
+
hb_allocator_T malloc_allocator = hb_allocator_with_malloc();
|
|
66
|
+
char* source = herb_read_file(argv[2], &malloc_allocator);
|
|
58
67
|
|
|
59
|
-
|
|
68
|
+
hb_allocator_T allocator;
|
|
69
|
+
if (!hb_allocator_init(&allocator, HB_ALLOCATOR_ARENA)) {
|
|
70
|
+
fprintf(stderr, "Failed to initialize allocator\n");
|
|
71
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
72
|
+
return EXIT_FAILURE;
|
|
73
|
+
}
|
|
60
74
|
|
|
61
|
-
|
|
75
|
+
hb_buffer_T output;
|
|
76
|
+
if (!hb_buffer_init(&output, 4096, &allocator)) { return 1; }
|
|
62
77
|
|
|
63
78
|
struct timespec start, end;
|
|
64
79
|
clock_gettime(CLOCK_MONOTONIC, &start);
|
|
65
80
|
|
|
81
|
+
int silent = 0;
|
|
82
|
+
if (argc > 3 && string_equals(argv[3], "--silent")) { silent = 1; }
|
|
83
|
+
|
|
66
84
|
if (string_equals(argv[1], "lex")) {
|
|
67
|
-
herb_lex_to_buffer(source, &output);
|
|
85
|
+
herb_lex_to_buffer(source, &output, &allocator);
|
|
68
86
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
|
69
87
|
|
|
88
|
+
if (!silent) { hb_arena_print_stats((hb_arena_T*) allocator.context); }
|
|
89
|
+
|
|
70
90
|
puts(output.value);
|
|
71
91
|
print_time_diff(start, end, "lexing");
|
|
72
92
|
|
|
73
|
-
|
|
74
|
-
|
|
93
|
+
hb_buffer_free(&output);
|
|
94
|
+
hb_allocator_destroy(&allocator);
|
|
95
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
75
96
|
|
|
76
97
|
return EXIT_SUCCESS;
|
|
77
98
|
}
|
|
78
99
|
|
|
79
100
|
if (string_equals(argv[1], "parse")) {
|
|
80
|
-
AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
|
|
81
|
-
|
|
82
|
-
herb_analyze_parse_tree(root, source);
|
|
101
|
+
AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, &allocator);
|
|
83
102
|
|
|
84
103
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
|
85
104
|
|
|
86
|
-
int silent = 0;
|
|
87
|
-
if (argc > 3 && string_equals(argv[3], "--silent")) { silent = 1; }
|
|
88
|
-
|
|
89
105
|
if (!silent) {
|
|
106
|
+
hb_arena_print_stats((hb_arena_T*) allocator.context);
|
|
107
|
+
|
|
108
|
+
#ifndef HERB_EXCLUDE_PRETTYPRINT
|
|
90
109
|
ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
|
|
91
110
|
puts(output.value);
|
|
111
|
+
#endif
|
|
92
112
|
|
|
93
113
|
print_time_diff(start, end, "parsing");
|
|
94
114
|
}
|
|
95
115
|
|
|
96
|
-
ast_node_free((AST_NODE_T*) root);
|
|
97
|
-
|
|
98
|
-
|
|
116
|
+
ast_node_free((AST_NODE_T*) root, &allocator);
|
|
117
|
+
|
|
118
|
+
hb_buffer_free(&output);
|
|
119
|
+
hb_allocator_destroy(&allocator);
|
|
120
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
99
121
|
|
|
100
122
|
return EXIT_SUCCESS;
|
|
101
123
|
}
|
|
102
124
|
|
|
103
125
|
if (string_equals(argv[1], "ruby")) {
|
|
104
|
-
herb_extract_ruby_to_buffer(source, &output);
|
|
126
|
+
herb_extract_ruby_to_buffer(source, &output, &allocator);
|
|
105
127
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
|
106
128
|
|
|
107
129
|
puts(output.value);
|
|
108
130
|
print_time_diff(start, end, "extracting Ruby");
|
|
109
131
|
|
|
110
|
-
|
|
111
|
-
|
|
132
|
+
hb_buffer_free(&output);
|
|
133
|
+
hb_allocator_destroy(&allocator);
|
|
134
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
112
135
|
|
|
113
136
|
return EXIT_SUCCESS;
|
|
114
137
|
}
|
|
115
138
|
|
|
116
139
|
if (string_equals(argv[1], "html")) {
|
|
117
|
-
herb_extract_html_to_buffer(source, &output);
|
|
140
|
+
herb_extract_html_to_buffer(source, &output, &allocator);
|
|
118
141
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
|
119
142
|
|
|
120
143
|
puts(output.value);
|
|
121
144
|
print_time_diff(start, end, "extracting HTML");
|
|
122
145
|
|
|
123
|
-
|
|
124
|
-
|
|
146
|
+
hb_buffer_free(&output);
|
|
147
|
+
hb_allocator_destroy(&allocator);
|
|
148
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
125
149
|
|
|
126
150
|
return EXIT_SUCCESS;
|
|
127
151
|
}
|
|
@@ -129,14 +153,15 @@ int main(const int argc, char* argv[]) {
|
|
|
129
153
|
if (string_equals(argv[1], "prism")) {
|
|
130
154
|
printf("HTML+ERB File: \n%s\n", source);
|
|
131
155
|
|
|
132
|
-
char* ruby_source = herb_extract(source, HERB_EXTRACT_LANGUAGE_RUBY);
|
|
156
|
+
char* ruby_source = herb_extract(source, HERB_EXTRACT_LANGUAGE_RUBY, &allocator);
|
|
133
157
|
printf("Extracted Ruby: \n%s\n", ruby_source);
|
|
134
158
|
|
|
135
159
|
herb_parse_ruby_to_stdout(ruby_source);
|
|
136
160
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
161
|
+
hb_allocator_dealloc(&allocator, ruby_source);
|
|
162
|
+
hb_buffer_free(&output);
|
|
163
|
+
hb_allocator_destroy(&allocator);
|
|
164
|
+
hb_allocator_dealloc(&malloc_allocator, source);
|
|
140
165
|
|
|
141
166
|
return EXIT_SUCCESS;
|
|
142
167
|
}
|