@herb-tools/node 0.8.10 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/binding.gyp +26 -8
- package/dist/herb-node.cjs +41 -12
- package/dist/herb-node.cjs.map +1 -1
- package/dist/herb-node.esm.js +8 -1
- package/dist/herb-node.esm.js.map +1 -1
- package/dist/types/node-backend.d.ts +3 -1
- package/extension/error_helpers.cpp +395 -73
- package/extension/error_helpers.h +13 -3
- package/extension/extension_helpers.cpp +38 -35
- package/extension/extension_helpers.h +2 -2
- package/extension/herb.cpp +183 -64
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/analyze/action_view/content_tag.c +70 -0
- package/extension/libherb/analyze/action_view/link_to.c +143 -0
- package/extension/libherb/analyze/action_view/registry.c +60 -0
- package/extension/libherb/analyze/action_view/tag.c +64 -0
- package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
- package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
- package/extension/libherb/analyze/analyze.c +882 -0
- package/extension/libherb/{include → analyze}/analyze.h +14 -4
- package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/analyze/builders.c +343 -0
- package/extension/libherb/analyze/builders.h +27 -0
- package/extension/libherb/analyze/conditional_elements.c +594 -0
- package/extension/libherb/analyze/conditional_elements.h +9 -0
- package/extension/libherb/analyze/conditional_open_tags.c +640 -0
- package/extension/libherb/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/analyze/control_type.c +250 -0
- package/extension/libherb/analyze/control_type.h +14 -0
- package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
- package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/analyze/invalid_structures.c +193 -0
- package/extension/libherb/analyze/invalid_structures.h +11 -0
- package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- package/extension/libherb/analyze/parse_errors.c +84 -0
- package/extension/libherb/analyze/prism_annotate.c +397 -0
- package/extension/libherb/analyze/prism_annotate.h +16 -0
- package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
- package/extension/libherb/ast_node.c +17 -7
- package/extension/libherb/ast_node.h +11 -5
- package/extension/libherb/ast_nodes.c +663 -388
- package/extension/libherb/ast_nodes.h +118 -39
- package/extension/libherb/ast_pretty_print.c +191 -7
- package/extension/libherb/ast_pretty_print.h +6 -1
- package/extension/libherb/element_source.h +3 -8
- package/extension/libherb/errors.c +1077 -521
- package/extension/libherb/errors.h +149 -56
- package/extension/libherb/extract.c +145 -49
- package/extension/libherb/extract.h +21 -5
- package/extension/libherb/herb.c +52 -34
- package/extension/libherb/herb.h +18 -6
- package/extension/libherb/herb_prism_node.h +13 -0
- package/extension/libherb/html_util.c +241 -12
- package/extension/libherb/html_util.h +7 -2
- package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
- package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/include/analyze/builders.h +27 -0
- package/extension/libherb/include/analyze/conditional_elements.h +9 -0
- package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/include/analyze/control_type.h +14 -0
- package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/include/analyze/invalid_structures.h +11 -0
- package/extension/libherb/include/analyze/prism_annotate.h +16 -0
- package/extension/libherb/include/ast_node.h +11 -5
- package/extension/libherb/include/ast_nodes.h +118 -39
- package/extension/libherb/include/ast_pretty_print.h +6 -1
- package/extension/libherb/include/element_source.h +3 -8
- package/extension/libherb/include/errors.h +149 -56
- package/extension/libherb/include/extract.h +21 -5
- package/extension/libherb/include/herb.h +18 -6
- package/extension/libherb/include/herb_prism_node.h +13 -0
- package/extension/libherb/include/html_util.h +7 -2
- package/extension/libherb/include/io.h +3 -1
- package/extension/libherb/include/lex_helpers.h +29 -0
- package/extension/libherb/include/lexer.h +1 -1
- package/extension/libherb/include/lexer_peek_helpers.h +87 -13
- package/extension/libherb/include/lexer_struct.h +2 -0
- package/extension/libherb/include/location.h +2 -1
- package/extension/libherb/include/parser.h +27 -2
- package/extension/libherb/include/parser_helpers.h +19 -3
- package/extension/libherb/include/pretty_print.h +10 -5
- package/extension/libherb/include/prism_context.h +45 -0
- package/extension/libherb/include/prism_helpers.h +10 -7
- package/extension/libherb/include/prism_serialized.h +12 -0
- package/extension/libherb/include/token.h +16 -4
- package/extension/libherb/include/token_struct.h +10 -3
- package/extension/libherb/include/utf8.h +2 -1
- package/extension/libherb/include/util/hb_allocator.h +78 -0
- package/extension/libherb/include/util/hb_arena.h +6 -1
- package/extension/libherb/include/util/hb_arena_debug.h +12 -1
- package/extension/libherb/include/util/hb_array.h +7 -3
- package/extension/libherb/include/util/hb_buffer.h +6 -4
- package/extension/libherb/include/util/hb_foreach.h +79 -0
- package/extension/libherb/include/util/hb_narray.h +8 -4
- package/extension/libherb/include/util/hb_string.h +56 -9
- package/extension/libherb/include/util.h +6 -3
- package/extension/libherb/include/version.h +1 -1
- package/extension/libherb/io.c +3 -2
- package/extension/libherb/io.h +3 -1
- package/extension/libherb/lex_helpers.h +29 -0
- package/extension/libherb/lexer.c +42 -30
- package/extension/libherb/lexer.h +1 -1
- package/extension/libherb/lexer_peek_helpers.c +12 -74
- package/extension/libherb/lexer_peek_helpers.h +87 -13
- package/extension/libherb/lexer_struct.h +2 -0
- package/extension/libherb/location.c +2 -2
- package/extension/libherb/location.h +2 -1
- package/extension/libherb/main.c +53 -28
- package/extension/libherb/parser.c +783 -247
- package/extension/libherb/parser.h +27 -2
- package/extension/libherb/parser_helpers.c +110 -23
- package/extension/libherb/parser_helpers.h +19 -3
- package/extension/libherb/parser_match_tags.c +110 -49
- package/extension/libherb/pretty_print.c +29 -24
- package/extension/libherb/pretty_print.h +10 -5
- package/extension/libherb/prism_context.h +45 -0
- package/extension/libherb/prism_helpers.c +30 -27
- package/extension/libherb/prism_helpers.h +10 -7
- package/extension/libherb/prism_serialized.h +12 -0
- package/extension/libherb/ruby_parser.c +2 -0
- package/extension/libherb/token.c +151 -66
- package/extension/libherb/token.h +16 -4
- package/extension/libherb/token_matchers.c +0 -1
- package/extension/libherb/token_struct.h +10 -3
- package/extension/libherb/utf8.c +7 -6
- package/extension/libherb/utf8.h +2 -1
- package/extension/libherb/util/hb_allocator.c +341 -0
- package/extension/libherb/util/hb_allocator.h +78 -0
- package/extension/libherb/util/hb_arena.c +81 -56
- package/extension/libherb/util/hb_arena.h +6 -1
- package/extension/libherb/util/hb_arena_debug.c +32 -17
- package/extension/libherb/util/hb_arena_debug.h +12 -1
- package/extension/libherb/util/hb_array.c +30 -15
- package/extension/libherb/util/hb_array.h +7 -3
- package/extension/libherb/util/hb_buffer.c +17 -21
- package/extension/libherb/util/hb_buffer.h +6 -4
- package/extension/libherb/util/hb_foreach.h +79 -0
- package/extension/libherb/util/hb_narray.c +22 -7
- package/extension/libherb/util/hb_narray.h +8 -4
- package/extension/libherb/util/hb_string.c +49 -35
- package/extension/libherb/util/hb_string.h +56 -9
- package/extension/libherb/util.c +21 -11
- package/extension/libherb/util.h +6 -3
- package/extension/libherb/version.h +1 -1
- package/extension/libherb/visitor.c +48 -1
- package/extension/nodes.cpp +451 -6
- package/extension/nodes.h +8 -1
- package/package.json +12 -8
- package/src/node-backend.ts +11 -1
- package/dist/types/index-cjs.d.cts +0 -1
- package/extension/libherb/analyze.c +0 -1608
- package/extension/libherb/element_source.c +0 -12
- package/extension/libherb/include/util/hb_system.h +0 -9
- package/extension/libherb/util/hb_system.c +0 -30
- package/extension/libherb/util/hb_system.h +0 -9
- package/src/index-cjs.cts +0 -22
- /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
- /package/src/{index-esm.mts → index.ts} +0 -0
|
@@ -4,26 +4,73 @@
|
|
|
4
4
|
#include <stdbool.h>
|
|
5
5
|
#include <stddef.h>
|
|
6
6
|
#include <stdint.h>
|
|
7
|
+
#include <string.h>
|
|
8
|
+
#include <strings.h>
|
|
7
9
|
|
|
8
|
-
#include "
|
|
10
|
+
#include "hb_allocator.h"
|
|
11
|
+
#include "hb_foreach.h"
|
|
9
12
|
|
|
10
13
|
typedef struct HB_STRING_STRUCT {
|
|
11
14
|
char* data;
|
|
12
15
|
uint32_t length;
|
|
13
16
|
} hb_string_T;
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
#define HB_STRING_EMPTY ((hb_string_T) { .data = "", .length = 0 })
|
|
19
|
+
#define HB_STRING_NULL ((hb_string_T) { .data = NULL, .length = 0 })
|
|
20
|
+
|
|
21
|
+
#define HB_STRING_LITERAL(string) { .data = (char*) (string), .length = (uint32_t) (sizeof(string) - 1) }
|
|
22
|
+
|
|
23
|
+
#define HB_STRING_LIST(...) { HB_FOR_EACH(HB_STRING_LITERAL, __VA_ARGS__) }
|
|
24
|
+
|
|
25
|
+
#define hb_string(string) \
|
|
26
|
+
(__builtin_constant_p(string) \
|
|
27
|
+
? ((hb_string_T) { .data = (char*) (string), .length = (uint32_t) __builtin_strlen(string) }) \
|
|
28
|
+
: hb_string_from_c_string(string))
|
|
29
|
+
|
|
30
|
+
hb_string_T hb_string_from_c_string(const char* null_terminated_c_string);
|
|
31
|
+
|
|
32
|
+
static inline bool hb_string_is_null(hb_string_T string) {
|
|
33
|
+
return string.data == NULL;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static inline bool hb_string_is_empty(hb_string_T string) {
|
|
37
|
+
return string.data == NULL || string.length == 0;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static inline hb_string_T hb_string_slice(hb_string_T string, uint32_t offset) {
|
|
41
|
+
if (string.length < offset) { return HB_STRING_NULL; }
|
|
42
|
+
|
|
43
|
+
return (hb_string_T) { .data = string.data + offset, .length = string.length - offset };
|
|
44
|
+
}
|
|
22
45
|
|
|
46
|
+
static inline bool hb_string_equals(hb_string_T a, hb_string_T b) {
|
|
47
|
+
if (a.length != b.length) { return false; }
|
|
48
|
+
|
|
49
|
+
return strncmp(a.data, b.data, a.length) == 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static inline bool hb_string_equals_case_insensitive(hb_string_T a, hb_string_T b) {
|
|
53
|
+
if (a.length != b.length) { return false; }
|
|
54
|
+
|
|
55
|
+
return strncasecmp(a.data, b.data, a.length) == 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static inline bool hb_string_starts_with(hb_string_T string, hb_string_T expected_prefix) {
|
|
59
|
+
if (hb_string_is_empty(string) || hb_string_is_empty(expected_prefix)) { return false; }
|
|
60
|
+
if (string.length < expected_prefix.length) { return false; }
|
|
61
|
+
|
|
62
|
+
return strncmp(string.data, expected_prefix.data, expected_prefix.length) == 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
hb_string_T hb_string_truncate(hb_string_T string, uint32_t max_length);
|
|
23
66
|
hb_string_T hb_string_range(hb_string_T string, uint32_t from, uint32_t to);
|
|
67
|
+
hb_string_T hb_string_trim_start(hb_string_T string);
|
|
68
|
+
hb_string_T hb_string_trim_end(hb_string_T string);
|
|
69
|
+
hb_string_T hb_string_trim(hb_string_T string);
|
|
70
|
+
bool hb_string_is_blank(hb_string_T string);
|
|
71
|
+
hb_string_T hb_string_copy(hb_string_T string, hb_allocator_T* allocator);
|
|
24
72
|
|
|
25
73
|
char* hb_string_to_c_string_using_malloc(hb_string_T string);
|
|
26
|
-
|
|
27
74
|
char* hb_string_to_c_string(hb_arena_T* allocator, hb_string_T string);
|
|
28
75
|
|
|
29
76
|
#endif
|
|
@@ -5,10 +5,13 @@
|
|
|
5
5
|
#include <stdbool.h>
|
|
6
6
|
#include <stdlib.h>
|
|
7
7
|
|
|
8
|
+
struct hb_allocator;
|
|
9
|
+
|
|
8
10
|
int is_newline(int character);
|
|
11
|
+
int is_whitespace(int character);
|
|
12
|
+
hb_string_T escape_newlines(struct hb_allocator* allocator, hb_string_T input);
|
|
13
|
+
hb_string_T quoted_string(struct hb_allocator* allocator, hb_string_T input);
|
|
9
14
|
|
|
10
|
-
|
|
11
|
-
hb_string_T quoted_string(hb_string_T input);
|
|
12
|
-
char* herb_strdup(const char* s);
|
|
15
|
+
char* convert_underscores_to_dashes(const char* input);
|
|
13
16
|
|
|
14
17
|
#endif
|
package/extension/libherb/io.c
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "include/io.h"
|
|
2
|
+
#include "include/util/hb_allocator.h"
|
|
2
3
|
#include "include/util/hb_buffer.h"
|
|
3
4
|
|
|
4
5
|
#include <errno.h>
|
|
@@ -7,7 +8,7 @@
|
|
|
7
8
|
|
|
8
9
|
#define FILE_READ_CHUNK 4096
|
|
9
10
|
|
|
10
|
-
char* herb_read_file(const char* filename) {
|
|
11
|
+
char* herb_read_file(const char* filename, struct hb_allocator* allocator) {
|
|
11
12
|
if (!filename) { return NULL; }
|
|
12
13
|
|
|
13
14
|
FILE* fp = fopen(filename, "rb");
|
|
@@ -18,7 +19,7 @@ char* herb_read_file(const char* filename) {
|
|
|
18
19
|
}
|
|
19
20
|
|
|
20
21
|
hb_buffer_T buffer;
|
|
21
|
-
hb_buffer_init(&buffer, 4096);
|
|
22
|
+
hb_buffer_init(&buffer, 4096, allocator);
|
|
22
23
|
|
|
23
24
|
char chunk[FILE_READ_CHUNK];
|
|
24
25
|
size_t bytes_read;
|
package/extension/libherb/io.h
CHANGED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#ifndef HERB_LEX_HELPERS_H
|
|
2
|
+
#define HERB_LEX_HELPERS_H
|
|
3
|
+
|
|
4
|
+
#include "herb.h"
|
|
5
|
+
#include "token.h"
|
|
6
|
+
#include "util/hb_allocator.h"
|
|
7
|
+
#include "util/hb_array.h"
|
|
8
|
+
#include "util/hb_buffer.h"
|
|
9
|
+
#include "util/hb_string.h"
|
|
10
|
+
|
|
11
|
+
#include <stdlib.h>
|
|
12
|
+
|
|
13
|
+
static inline void herb_lex_to_buffer(const char* source, hb_buffer_T* output, hb_allocator_T* allocator) {
|
|
14
|
+
hb_array_T* tokens = herb_lex(source, allocator);
|
|
15
|
+
|
|
16
|
+
for (size_t i = 0; i < hb_array_size(tokens); i++) {
|
|
17
|
+
token_T* token = hb_array_get(tokens, i);
|
|
18
|
+
|
|
19
|
+
hb_string_T type = token_to_string(allocator, token);
|
|
20
|
+
hb_buffer_append_string(output, type);
|
|
21
|
+
hb_allocator_dealloc(allocator, type.data);
|
|
22
|
+
|
|
23
|
+
hb_buffer_append(output, "\n");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
herb_free_tokens(&tokens, allocator);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
#endif
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
#include "include/lexer_peek_helpers.h"
|
|
2
|
+
#include "include/macros.h"
|
|
2
3
|
#include "include/token.h"
|
|
3
4
|
#include "include/utf8.h"
|
|
4
5
|
#include "include/util.h"
|
|
5
|
-
#include "include/util/hb_buffer.h"
|
|
6
6
|
#include "include/util/hb_string.h"
|
|
7
7
|
|
|
8
8
|
#include <ctype.h>
|
|
9
|
+
#include <stdint.h>
|
|
9
10
|
#include <string.h>
|
|
10
11
|
|
|
11
12
|
#define LEXER_STALL_LIMIT 5
|
|
12
13
|
|
|
14
|
+
static hb_string_T erb_open_patterns[] = HB_STRING_LIST("<%==", "<%%=", "<%graphql", "<%=", "<%#", "<%-", "<%%", "<%");
|
|
15
|
+
|
|
13
16
|
static bool lexer_eof(const lexer_T* lexer) {
|
|
14
17
|
return lexer->current_character == '\0' || lexer->stalled;
|
|
15
18
|
}
|
|
@@ -31,11 +34,13 @@ static bool lexer_stalled(lexer_T* lexer) {
|
|
|
31
34
|
return lexer->stalled;
|
|
32
35
|
}
|
|
33
36
|
|
|
34
|
-
void lexer_init(lexer_T* lexer, const char* source) {
|
|
37
|
+
void lexer_init(lexer_T* lexer, const char* source, hb_allocator_T* allocator) {
|
|
38
|
+
lexer->allocator = allocator;
|
|
39
|
+
|
|
35
40
|
if (source != NULL) {
|
|
36
41
|
lexer->source = hb_string(source);
|
|
37
42
|
} else {
|
|
38
|
-
lexer->source =
|
|
43
|
+
lexer->source = HB_STRING_EMPTY;
|
|
39
44
|
}
|
|
40
45
|
|
|
41
46
|
lexer->current_character = lexer->source.data[0];
|
|
@@ -55,11 +60,11 @@ void lexer_init(lexer_T* lexer, const char* source) {
|
|
|
55
60
|
}
|
|
56
61
|
|
|
57
62
|
token_T* lexer_error(lexer_T* lexer, const char* message) {
|
|
58
|
-
char
|
|
63
|
+
char buffer[128];
|
|
59
64
|
|
|
60
65
|
snprintf(
|
|
61
|
-
|
|
62
|
-
sizeof(
|
|
66
|
+
buffer,
|
|
67
|
+
sizeof(buffer),
|
|
63
68
|
"[Lexer] Error: %s (character '%c', line %u, col %u)\n",
|
|
64
69
|
message,
|
|
65
70
|
lexer->current_character,
|
|
@@ -67,7 +72,10 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
|
|
|
67
72
|
lexer->current_column
|
|
68
73
|
);
|
|
69
74
|
|
|
70
|
-
|
|
75
|
+
size_t length = strlen(buffer);
|
|
76
|
+
char* error_message = hb_allocator_strndup(lexer->allocator, buffer, length);
|
|
77
|
+
|
|
78
|
+
return token_init((hb_string_T) { .data = error_message, .length = (uint32_t) length }, TOKEN_ERROR, lexer);
|
|
71
79
|
}
|
|
72
80
|
|
|
73
81
|
static void lexer_advance(lexer_T* lexer) {
|
|
@@ -79,8 +87,8 @@ static void lexer_advance(lexer_T* lexer) {
|
|
|
79
87
|
}
|
|
80
88
|
}
|
|
81
89
|
|
|
82
|
-
static void lexer_advance_utf8_bytes(lexer_T* lexer,
|
|
83
|
-
if (byte_count
|
|
90
|
+
static void lexer_advance_utf8_bytes(lexer_T* lexer, uint32_t byte_count) {
|
|
91
|
+
if (byte_count == 0) { return; }
|
|
84
92
|
|
|
85
93
|
if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
|
|
86
94
|
if (!is_newline(lexer->current_character)) { lexer->current_column++; }
|
|
@@ -120,19 +128,17 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type
|
|
|
120
128
|
}
|
|
121
129
|
|
|
122
130
|
static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
|
|
123
|
-
|
|
124
|
-
buffer[0] = lexer->current_character;
|
|
125
|
-
buffer[1] = '\0';
|
|
126
|
-
|
|
127
|
-
return lexer_advance_with(lexer, hb_string(buffer), type);
|
|
131
|
+
return lexer_advance_with_next(lexer, 1, type);
|
|
128
132
|
}
|
|
129
133
|
|
|
130
134
|
static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
|
|
131
|
-
|
|
135
|
+
uint32_t char_byte_length = utf8_sequence_length(hb_string_slice(lexer->source, lexer->current_position));
|
|
136
|
+
|
|
132
137
|
if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
|
|
138
|
+
|
|
133
139
|
uint32_t start_position = lexer->current_position;
|
|
134
140
|
|
|
135
|
-
for (
|
|
141
|
+
for (uint32_t i = 0; i < char_byte_length; i++) {
|
|
136
142
|
if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); }
|
|
137
143
|
}
|
|
138
144
|
|
|
@@ -171,7 +177,8 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
|
|
|
171
177
|
|
|
172
178
|
while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
|
|
173
179
|
|| lexer->current_character == ':')
|
|
174
|
-
&& !lexer_peek_for_html_comment_end(lexer, 0) && !
|
|
180
|
+
&& !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_peek_for_html_comment_invalid_end(lexer, 0)
|
|
181
|
+
&& !lexer_eof(lexer)) {
|
|
175
182
|
|
|
176
183
|
lexer_advance(lexer);
|
|
177
184
|
}
|
|
@@ -185,13 +192,9 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
|
|
|
185
192
|
// ===== ERB Parsing
|
|
186
193
|
|
|
187
194
|
static token_T* lexer_parse_erb_open(lexer_T* lexer) {
|
|
188
|
-
hb_string_T erb_patterns[] = { hb_string("<%=="), hb_string("<%%="), hb_string("<%="), hb_string("<%#"),
|
|
189
|
-
hb_string("<%-"), hb_string("<%%"), hb_string("<%graphql"), hb_string("<%") };
|
|
190
|
-
|
|
191
195
|
lexer->state = STATE_ERB_CONTENT;
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START);
|
|
196
|
+
for (size_t i = 0; i < sizeof(erb_open_patterns) / sizeof(erb_open_patterns[0]); i++) {
|
|
197
|
+
token_T* match = lexer_match_and_advance(lexer, erb_open_patterns[i], TOKEN_ERB_START);
|
|
195
198
|
if (match) { return match; }
|
|
196
199
|
}
|
|
197
200
|
|
|
@@ -203,11 +206,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
|
|
|
203
206
|
|
|
204
207
|
while (!lexer_peek_erb_end(lexer, 0)) {
|
|
205
208
|
if (lexer_eof(lexer)) {
|
|
206
|
-
token_T* token =
|
|
207
|
-
hb_string_range(lexer->source, start_position, lexer->current_position),
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
209
|
+
token_T* token =
|
|
210
|
+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
|
|
211
|
+
|
|
212
|
+
return token;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (lexer_peek_erb_start(lexer, 0)) {
|
|
216
|
+
lexer->state = STATE_DATA;
|
|
217
|
+
|
|
218
|
+
token_T* token =
|
|
219
|
+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
|
|
211
220
|
|
|
212
221
|
return token;
|
|
213
222
|
}
|
|
@@ -244,7 +253,7 @@ static token_T* lexer_parse_erb_close(lexer_T* lexer) {
|
|
|
244
253
|
// ===== Tokenizing Function
|
|
245
254
|
|
|
246
255
|
token_T* lexer_next_token(lexer_T* lexer) {
|
|
247
|
-
if (lexer_eof(lexer)) { return token_init(
|
|
256
|
+
if (lexer_eof(lexer)) { return token_init(HB_STRING_EMPTY, TOKEN_EOF, lexer); }
|
|
248
257
|
if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
|
|
249
258
|
|
|
250
259
|
if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
|
|
@@ -302,7 +311,10 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
|
302
311
|
}
|
|
303
312
|
|
|
304
313
|
case '-': {
|
|
305
|
-
token_T* token = lexer_match_and_advance(lexer, hb_string("
|
|
314
|
+
token_T* token = lexer_match_and_advance(lexer, hb_string("--!>"), TOKEN_HTML_COMMENT_INVALID_END);
|
|
315
|
+
if (token) { return token; }
|
|
316
|
+
|
|
317
|
+
token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
|
|
306
318
|
return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
|
|
307
319
|
}
|
|
308
320
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
#include "lexer_struct.h"
|
|
5
5
|
#include "token_struct.h"
|
|
6
6
|
|
|
7
|
-
void lexer_init(lexer_T* lexer, const char* source);
|
|
7
|
+
void lexer_init(lexer_T* lexer, const char* source, hb_allocator_T* allocator);
|
|
8
8
|
token_T* lexer_next_token(lexer_T* lexer);
|
|
9
9
|
token_T* lexer_error(lexer_T* lexer, const char* message);
|
|
10
10
|
|
|
@@ -1,22 +1,10 @@
|
|
|
1
1
|
#include "include/lexer_peek_helpers.h"
|
|
2
2
|
#include "include/lexer.h"
|
|
3
|
-
#include "include/lexer_struct.h"
|
|
4
|
-
#include "include/macros.h"
|
|
5
3
|
#include "include/token.h"
|
|
6
|
-
#include "include/util/hb_string.h"
|
|
7
4
|
|
|
8
5
|
#include <ctype.h>
|
|
9
|
-
#include <stdbool.h>
|
|
10
6
|
|
|
11
|
-
|
|
12
|
-
return lexer->source.data[MAX(lexer->current_position - offset, 0)];
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
char lexer_peek(const lexer_T* lexer, uint32_t offset) {
|
|
16
|
-
return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) {
|
|
7
|
+
static bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, bool case_insensitive) {
|
|
20
8
|
hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset);
|
|
21
9
|
remaining_source.length = MIN(pattern.length, remaining_source.length);
|
|
22
10
|
|
|
@@ -47,31 +35,19 @@ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset) {
|
|
|
47
35
|
return lexer_peek_for(lexer, offset, hb_string("<!--"), false);
|
|
48
36
|
}
|
|
49
37
|
|
|
50
|
-
bool
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
55
|
-
return lexer_peek_for(lexer, offset, hb_string("%>"), false);
|
|
56
|
-
}
|
|
38
|
+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
|
|
39
|
+
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
|
|
57
40
|
|
|
58
|
-
|
|
59
|
-
return lexer_peek_for(lexer, offset, hb_string("-%>"), false);
|
|
60
|
-
}
|
|
41
|
+
uint32_t position = offset + 2;
|
|
61
42
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
43
|
+
while (lexer_peek(lexer, position) == ' ' || lexer_peek(lexer, position) == '\t'
|
|
44
|
+
|| lexer_peek(lexer, position) == '\n' || lexer_peek(lexer, position) == '\r') {
|
|
45
|
+
position++;
|
|
46
|
+
}
|
|
65
47
|
|
|
66
|
-
|
|
67
|
-
return lexer_peek_for(lexer, offset, hb_string("=%>"), false);
|
|
68
|
-
}
|
|
48
|
+
char character = lexer_peek(lexer, position);
|
|
69
49
|
|
|
70
|
-
|
|
71
|
-
return (
|
|
72
|
-
lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
|
|
73
|
-
|| lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset)
|
|
74
|
-
);
|
|
50
|
+
return isalpha(character) || character == '_';
|
|
75
51
|
}
|
|
76
52
|
|
|
77
53
|
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
|
|
@@ -84,13 +60,13 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
|
|
|
84
60
|
token_T* token = lexer_next_token(lexer);
|
|
85
61
|
|
|
86
62
|
while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) {
|
|
87
|
-
token_free(token);
|
|
63
|
+
token_free(token, lexer->allocator);
|
|
88
64
|
token = lexer_next_token(lexer);
|
|
89
65
|
}
|
|
90
66
|
|
|
91
67
|
bool result = (token && token->type == token_type);
|
|
92
68
|
|
|
93
|
-
if (token) { token_free(token); }
|
|
69
|
+
if (token) { token_free(token, lexer->allocator); }
|
|
94
70
|
|
|
95
71
|
lexer->current_position = saved_position;
|
|
96
72
|
lexer->current_line = saved_line;
|
|
@@ -100,41 +76,3 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
|
|
|
100
76
|
|
|
101
77
|
return result;
|
|
102
78
|
}
|
|
103
|
-
|
|
104
|
-
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
|
|
105
|
-
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
|
|
106
|
-
|
|
107
|
-
uint32_t pos = offset + 2;
|
|
108
|
-
|
|
109
|
-
while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
|
|
110
|
-
|| lexer_peek(lexer, pos) == '\r') {
|
|
111
|
-
pos++;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
char c = lexer_peek(lexer, pos);
|
|
115
|
-
|
|
116
|
-
return isalpha(c) || c == '_';
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
|
|
120
|
-
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
|
|
121
|
-
.line = lexer->current_line,
|
|
122
|
-
.column = lexer->current_column,
|
|
123
|
-
.previous_position = lexer->previous_position,
|
|
124
|
-
.previous_line = lexer->previous_line,
|
|
125
|
-
.previous_column = lexer->previous_column,
|
|
126
|
-
.current_character = lexer->current_character,
|
|
127
|
-
.state = lexer->state };
|
|
128
|
-
return snapshot;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
|
|
132
|
-
lexer->current_position = snapshot.position;
|
|
133
|
-
lexer->current_line = snapshot.line;
|
|
134
|
-
lexer->current_column = snapshot.column;
|
|
135
|
-
lexer->previous_position = snapshot.previous_position;
|
|
136
|
-
lexer->previous_line = snapshot.previous_line;
|
|
137
|
-
lexer->previous_column = snapshot.previous_column;
|
|
138
|
-
lexer->current_character = snapshot.current_character;
|
|
139
|
-
lexer->state = snapshot.state;
|
|
140
|
-
}
|
|
@@ -2,8 +2,11 @@
|
|
|
2
2
|
#define HERB_LEXER_PEEK_HELPERS_H
|
|
3
3
|
|
|
4
4
|
#include "lexer_struct.h"
|
|
5
|
+
#include "macros.h"
|
|
5
6
|
#include "token_struct.h"
|
|
7
|
+
#include "util/hb_string.h"
|
|
6
8
|
|
|
9
|
+
#include <ctype.h>
|
|
7
10
|
#include <stdbool.h>
|
|
8
11
|
#include <stdint.h>
|
|
9
12
|
#include <stdio.h>
|
|
@@ -20,27 +23,98 @@ typedef struct {
|
|
|
20
23
|
lexer_state_T state;
|
|
21
24
|
} lexer_state_snapshot_T;
|
|
22
25
|
|
|
23
|
-
char lexer_peek(const lexer_T* lexer, uint32_t offset);
|
|
24
26
|
bool lexer_peek_for_doctype(const lexer_T* lexer, uint32_t offset);
|
|
25
27
|
bool lexer_peek_for_xml_declaration(const lexer_T* lexer, uint32_t offset);
|
|
26
28
|
bool lexer_peek_for_cdata_start(const lexer_T* lexer, uint32_t offset);
|
|
27
29
|
bool lexer_peek_for_cdata_end(const lexer_T* lexer, uint32_t offset);
|
|
28
|
-
|
|
29
30
|
bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset);
|
|
30
|
-
bool
|
|
31
|
+
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type);
|
|
32
|
+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset);
|
|
31
33
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset);
|
|
36
|
-
bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset);
|
|
34
|
+
static inline char lexer_peek(const lexer_T* lexer, uint32_t offset) {
|
|
35
|
+
return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
|
|
36
|
+
}
|
|
37
37
|
|
|
38
|
-
char lexer_backtrack(const lexer_T* lexer, uint32_t offset)
|
|
38
|
+
static inline char lexer_backtrack(const lexer_T* lexer, uint32_t offset) {
|
|
39
|
+
return lexer->source.data[MAX(lexer->current_position - offset, 0)];
|
|
40
|
+
}
|
|
39
41
|
|
|
40
|
-
bool
|
|
41
|
-
|
|
42
|
+
static inline bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) {
|
|
43
|
+
uint32_t position = lexer->current_position + offset;
|
|
44
|
+
|
|
45
|
+
return position + 2 < lexer->source.length && lexer->source.data[position] == '-'
|
|
46
|
+
&& lexer->source.data[position + 1] == '-' && lexer->source.data[position + 2] == '>';
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
static inline bool lexer_peek_for_html_comment_invalid_end(const lexer_T* lexer, uint32_t offset) {
|
|
50
|
+
uint32_t position = lexer->current_position + offset;
|
|
51
|
+
|
|
52
|
+
return position + 3 < lexer->source.length && lexer->source.data[position] == '-'
|
|
53
|
+
&& lexer->source.data[position + 1] == '-' && lexer->source.data[position + 2] == '!'
|
|
54
|
+
&& lexer->source.data[position + 3] == '>';
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
static inline bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset) {
|
|
58
|
+
uint32_t position = lexer->current_position + offset;
|
|
59
|
+
|
|
60
|
+
return position + 1 < lexer->source.length && lexer->source.data[position] == '<'
|
|
61
|
+
&& lexer->source.data[position + 1] == '%';
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
static inline bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
65
|
+
uint32_t position = lexer->current_position + offset;
|
|
66
|
+
|
|
67
|
+
return position + 1 < lexer->source.length && lexer->source.data[position] == '%'
|
|
68
|
+
&& lexer->source.data[position + 1] == '>';
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
static inline bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
72
|
+
uint32_t position = lexer->current_position + offset;
|
|
73
|
+
|
|
74
|
+
return position + 2 < lexer->source.length && lexer->source.data[position] == '-'
|
|
75
|
+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static inline bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
79
|
+
uint32_t position = lexer->current_position + offset;
|
|
80
|
+
|
|
81
|
+
return position + 2 < lexer->source.length && lexer->source.data[position] == '%'
|
|
82
|
+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static inline bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) {
|
|
86
|
+
uint32_t position = lexer->current_position + offset;
|
|
87
|
+
|
|
88
|
+
return position + 2 < lexer->source.length && lexer->source.data[position] == '='
|
|
89
|
+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static inline bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
|
|
93
|
+
return lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
|
|
94
|
+
|| lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
static inline lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
|
|
98
|
+
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
|
|
99
|
+
.line = lexer->current_line,
|
|
100
|
+
.column = lexer->current_column,
|
|
101
|
+
.previous_position = lexer->previous_position,
|
|
102
|
+
.previous_line = lexer->previous_line,
|
|
103
|
+
.previous_column = lexer->previous_column,
|
|
104
|
+
.current_character = lexer->current_character,
|
|
105
|
+
.state = lexer->state };
|
|
106
|
+
return snapshot;
|
|
107
|
+
}
|
|
42
108
|
|
|
43
|
-
|
|
44
|
-
|
|
109
|
+
static inline void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
|
|
110
|
+
lexer->current_position = snapshot.position;
|
|
111
|
+
lexer->current_line = snapshot.line;
|
|
112
|
+
lexer->current_column = snapshot.column;
|
|
113
|
+
lexer->previous_position = snapshot.previous_position;
|
|
114
|
+
lexer->previous_line = snapshot.previous_line;
|
|
115
|
+
lexer->previous_column = snapshot.previous_column;
|
|
116
|
+
lexer->current_character = snapshot.current_character;
|
|
117
|
+
lexer->state = snapshot.state;
|
|
118
|
+
}
|
|
45
119
|
|
|
46
120
|
#endif
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#ifndef HERB_LEXER_STRUCT_H
|
|
2
2
|
#define HERB_LEXER_STRUCT_H
|
|
3
3
|
|
|
4
|
+
#include "util/hb_allocator.h"
|
|
4
5
|
#include "util/hb_string.h"
|
|
5
6
|
|
|
6
7
|
#include <stdbool.h>
|
|
@@ -14,6 +15,7 @@ typedef enum {
|
|
|
14
15
|
} lexer_state_T;
|
|
15
16
|
|
|
16
17
|
typedef struct LEXER_STRUCT {
|
|
18
|
+
hb_allocator_T* allocator;
|
|
17
19
|
hb_string_T source;
|
|
18
20
|
|
|
19
21
|
uint32_t current_line;
|
|
@@ -17,8 +17,8 @@ void location_from_positions(location_T* location, position_T start, position_T
|
|
|
17
17
|
location->end = end;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
location_T* location_create(position_T start, position_T end) {
|
|
21
|
-
location_T* location =
|
|
20
|
+
location_T* location_create(position_T start, position_T end, hb_allocator_T* allocator) {
|
|
21
|
+
location_T* location = hb_allocator_alloc(allocator, sizeof(location_T));
|
|
22
22
|
|
|
23
23
|
if (location != NULL) {
|
|
24
24
|
location->start = start;
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include <stdlib.h>
|
|
6
6
|
|
|
7
7
|
#include "position.h"
|
|
8
|
+
#include "util/hb_allocator.h"
|
|
8
9
|
|
|
9
10
|
typedef struct LOCATION_STRUCT {
|
|
10
11
|
position_T start;
|
|
@@ -21,6 +22,6 @@ void location_from(
|
|
|
21
22
|
|
|
22
23
|
void location_from_positions(location_T* location, position_T start, position_T end);
|
|
23
24
|
|
|
24
|
-
location_T* location_create(position_T start, position_T end);
|
|
25
|
+
location_T* location_create(position_T start, position_T end, hb_allocator_T* allocator);
|
|
25
26
|
|
|
26
27
|
#endif
|