herb 0.4.1-x86_64-linux-gnu → 0.4.3-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/ext/herb/error_helpers.c +1 -1
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/nodes.c +2 -2
- data/ext/herb/nodes.h +1 -1
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/ast/nodes.rb +1 -1
- data/lib/herb/cli.rb +2 -2
- data/lib/herb/errors.rb +1 -1
- data/lib/herb/project.rb +2 -0
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +1 -1
- data/sig/serialized_ast_errors.rbs +1 -1
- data/sig/serialized_ast_nodes.rbs +1 -1
- data/src/analyze.c +20 -5
- data/src/analyze_helpers.c +6 -2
- data/src/ast_nodes.c +1 -1
- data/src/ast_pretty_print.c +1 -1
- data/src/errors.c +1 -1
- data/src/include/analyze_helpers.h +1 -1
- data/src/include/ast_nodes.h +1 -1
- data/src/include/ast_pretty_print.h +1 -1
- data/src/include/errors.h +1 -1
- data/src/include/utf8.h +11 -0
- data/src/include/version.h +1 -1
- data/src/lexer.c +49 -2
- data/src/parser.c +103 -7
- data/src/utf8.c +46 -0
- data/src/visitor.c +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c738e73ddaeddb08aa35b3ecd9127acdec79adae5a2545e0795e174e1a4c9af
|
4
|
+
data.tar.gz: 45d7300bbd0f84b0c121c9289de7a80587fa93580f0d2babf8a64246a68492a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de86bfc92f1c123967c89f62abbe837d4ccd3fdf8b88088f568c36a9945f870a9a984aa99f94dba6b3c56a74b773b3ce5c509d833d517cf0a5b1ca4290088e5e
|
7
|
+
data.tar.gz: 6bdeb2827459ec5f31246e24fc0bc3d7e30ab3286a56c058cac668c3890bea0d816865104d55ab5528a914751f87692a78b333ad43351d69d74e1566164356a7
|
data/README.md
CHANGED
@@ -43,11 +43,11 @@ You can use Herb programmatically in **Ruby**, as well as in **JavaScript** via
|
|
43
43
|
|
44
44
|
For a complete overview of all available tools, libraries, and integrations, visit the [**Projects page**](https://herb-tools.dev/projects) on our documentation site.
|
45
45
|
|
46
|
-
##
|
46
|
+
## Motivation
|
47
47
|
|
48
48
|
HTML+ERB templates never really had good, accurate, and reliable tooling. While developer tooling for Ruby code improved significantly in the last few years (especially with the introduction of the new Prism parser), HTML+ERB files remained underserved, lacking fundamental support like syntax checking, auto-formatting, linting, and structural understanding.
|
49
49
|
|
50
|
-
At the same time, with the rise of tools like [Hotwire](https://hotwired.dev), [Stimulus](https://stimulus.hotwired.dev), [Turbo](https://turbo.hotwired.dev), [HTMX](https://htmx.org), [
|
50
|
+
At the same time, with the rise of tools like [Hotwire](https://hotwired.dev), [Stimulus](https://stimulus.hotwired.dev), [Turbo](https://turbo.hotwired.dev), [HTMX](https://htmx.org), [Unpoly](https://unpoly.com), and [Alpine.js](https://alpinejs.dev), advanced HTML templating became increasingly relevant (again). Developers expect modern, reliable, and precise tooling, especially given the robust ecosystem available to JavaScript frameworks and libraries.
|
51
51
|
|
52
52
|
Herb was built to close this tooling gap, providing proper tooling for HTML+ERB that matches what modern developers expect in the age of language servers, LLMs, and AI-driven workflows.
|
53
53
|
|
data/ext/herb/error_helpers.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.c.erb
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
data/ext/herb/error_helpers.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_EXTENSION_ERROR_HELPERS_H
|
5
5
|
#define HERB_EXTENSION_ERROR_HELPERS_H
|
data/ext/herb/nodes.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.c.erb
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
@@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte
|
|
415
415
|
VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
|
416
416
|
VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
|
417
417
|
VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
|
418
|
-
/* #<Herb::Template::AnalyzedRubyField:
|
418
|
+
/* #<Herb::Template::AnalyzedRubyField:0x00007fffe335db40 @name="analyzed_ruby", @options={kind: nil}> */
|
419
419
|
VALUE erb_content_node_analyzed_ruby = Qnil;
|
420
420
|
VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
|
421
421
|
VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;
|
data/ext/herb/nodes.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_EXTENSION_NODES_H
|
5
5
|
#define HERB_EXTENSION_NODES_H
|
data/lib/herb/3.0/herb.so
CHANGED
Binary file
|
data/lib/herb/3.1/herb.so
CHANGED
Binary file
|
data/lib/herb/3.2/herb.so
CHANGED
Binary file
|
data/lib/herb/3.3/herb.so
CHANGED
Binary file
|
data/lib/herb/3.4/herb.so
CHANGED
Binary file
|
data/lib/herb/ast/nodes.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/ast/nodes.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
module AST
|
data/lib/herb/cli.rb
CHANGED
@@ -110,8 +110,8 @@ class Herb::CLI
|
|
110
110
|
project.no_interactive = no_interactive
|
111
111
|
project.no_log_file = no_log_file
|
112
112
|
project.no_timing = no_timing
|
113
|
-
project.parse!
|
114
|
-
exit(0)
|
113
|
+
has_issues = project.parse!
|
114
|
+
exit(has_issues ? 1 : 0)
|
115
115
|
when "parse"
|
116
116
|
Herb.parse(file_content)
|
117
117
|
when "lex"
|
data/lib/herb/errors.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/errors.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
module Errors
|
data/lib/herb/project.rb
CHANGED
data/lib/herb/version.rb
CHANGED
data/lib/herb/visitor.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/visitor.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
class Visitor
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_errors.rbs.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
type serialized_unexpected_error = serialized_error & {
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_nodes.rbs.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
type serialized_document_node = serialized_node & {
|
data/src/analyze.c
CHANGED
@@ -39,7 +39,7 @@ static analyzed_ruby_T* herb_analyze_ruby(char* source) {
|
|
39
39
|
search_in_nodes(analyzed);
|
40
40
|
search_rescue_nodes(analyzed);
|
41
41
|
search_ensure_nodes(analyzed);
|
42
|
-
search_yield_nodes(analyzed);
|
42
|
+
search_yield_nodes(analyzed->root, analyzed);
|
43
43
|
search_block_closing_nodes(analyzed);
|
44
44
|
|
45
45
|
return analyzed;
|
@@ -95,8 +95,13 @@ static control_type_t detect_control_type(AST_ERB_CONTENT_NODE_T* erb_node) {
|
|
95
95
|
|
96
96
|
if (!ruby) { return CONTROL_TYPE_UNKNOWN; }
|
97
97
|
|
98
|
-
if (ruby->valid) {
|
98
|
+
if (ruby->valid) {
|
99
|
+
if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
|
100
|
+
return CONTROL_TYPE_UNKNOWN;
|
101
|
+
}
|
99
102
|
|
103
|
+
if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
|
104
|
+
if (has_block_node(ruby)) { return CONTROL_TYPE_BLOCK; }
|
100
105
|
if (has_if_node(ruby)) { return CONTROL_TYPE_IF; }
|
101
106
|
if (has_elsif_node(ruby)) { return CONTROL_TYPE_ELSIF; }
|
102
107
|
if (has_else_node(ruby)) { return CONTROL_TYPE_ELSE; }
|
@@ -112,8 +117,6 @@ static control_type_t detect_control_type(AST_ERB_CONTENT_NODE_T* erb_node) {
|
|
112
117
|
if (has_while_node(ruby)) { return CONTROL_TYPE_WHILE; }
|
113
118
|
if (has_until_node(ruby)) { return CONTROL_TYPE_UNTIL; }
|
114
119
|
if (has_for_node(ruby)) { return CONTROL_TYPE_FOR; }
|
115
|
-
if (has_block_node(ruby)) { return CONTROL_TYPE_BLOCK; }
|
116
|
-
if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
|
117
120
|
if (has_block_closing(ruby)) { return CONTROL_TYPE_BLOCK_CLOSE; }
|
118
121
|
|
119
122
|
return CONTROL_TYPE_UNKNOWN;
|
@@ -1020,10 +1023,22 @@ static array_T* rewrite_node_array(AST_NODE_T* node, array_T* array, analyze_rub
|
|
1020
1023
|
case CONTROL_TYPE_UNTIL:
|
1021
1024
|
case CONTROL_TYPE_FOR:
|
1022
1025
|
case CONTROL_TYPE_BLOCK:
|
1023
|
-
case CONTROL_TYPE_YIELD:
|
1024
1026
|
index = process_control_structure(node, array, index, new_array, context, type);
|
1025
1027
|
continue;
|
1026
1028
|
|
1029
|
+
case CONTROL_TYPE_YIELD: {
|
1030
|
+
AST_NODE_T* yield_node = create_control_node(erb_node, array_init(8), NULL, NULL, type);
|
1031
|
+
|
1032
|
+
if (yield_node) {
|
1033
|
+
array_append(new_array, yield_node);
|
1034
|
+
} else {
|
1035
|
+
array_append(new_array, item);
|
1036
|
+
}
|
1037
|
+
|
1038
|
+
index++;
|
1039
|
+
break;
|
1040
|
+
}
|
1041
|
+
|
1027
1042
|
default:
|
1028
1043
|
array_append(new_array, item);
|
1029
1044
|
index++;
|
data/src/analyze_helpers.c
CHANGED
@@ -279,10 +279,14 @@ bool search_ensure_nodes(analyzed_ruby_T* analyzed) {
|
|
279
279
|
return false;
|
280
280
|
}
|
281
281
|
|
282
|
-
bool search_yield_nodes(
|
283
|
-
|
282
|
+
bool search_yield_nodes(const pm_node_t* node, void* data) {
|
283
|
+
analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
|
284
|
+
|
285
|
+
if (node->type == PM_YIELD_NODE) {
|
284
286
|
analyzed->has_yield_node = true;
|
285
287
|
return true;
|
288
|
+
} else {
|
289
|
+
pm_visit_child_nodes(node, search_yield_nodes, analyzed);
|
286
290
|
}
|
287
291
|
|
288
292
|
return false;
|
data/src/ast_nodes.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_nodes.c.erb
|
3
3
|
|
4
4
|
#include <stdio.h>
|
5
5
|
#include <stdbool.h>
|
data/src/ast_pretty_print.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_pretty_print.c.erb
|
3
3
|
|
4
4
|
#include "include/ast_node.h"
|
5
5
|
#include "include/ast_nodes.h"
|
data/src/errors.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/errors.c.erb
|
3
3
|
|
4
4
|
#include "include/array.h"
|
5
5
|
#include "include/errors.h"
|
@@ -44,6 +44,6 @@ bool search_when_nodes(analyzed_ruby_T* analyzed);
|
|
44
44
|
bool search_in_nodes(analyzed_ruby_T* analyzed);
|
45
45
|
bool search_rescue_nodes(analyzed_ruby_T* analyzed);
|
46
46
|
bool search_ensure_nodes(analyzed_ruby_T* analyzed);
|
47
|
-
bool search_yield_nodes(
|
47
|
+
bool search_yield_nodes(const pm_node_t* node, void* data);
|
48
48
|
|
49
49
|
#endif
|
data/src/include/ast_nodes.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_nodes.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_AST_NODES_H
|
5
5
|
#define HERB_AST_NODES_H
|
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_pretty_print.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_AST_PRETTY_PRINT_H
|
5
5
|
#define HERB_AST_PRETTY_PRINT_H
|
data/src/include/errors.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/errors.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_ERRORS_H
|
5
5
|
#define HERB_ERRORS_H
|
data/src/include/utf8.h
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef HERB_UTF8_H
|
2
|
+
#define HERB_UTF8_H
|
3
|
+
|
4
|
+
#include <stdbool.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
|
7
|
+
int utf8_char_byte_length(unsigned char first_byte);
|
8
|
+
int utf8_sequence_length(const char* str, size_t position, size_t max_length);
|
9
|
+
bool utf8_is_valid_continuation_byte(unsigned char byte);
|
10
|
+
|
11
|
+
#endif
|
data/src/include/version.h
CHANGED
data/src/lexer.c
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "include/buffer.h"
|
2
2
|
#include "include/lexer_peek_helpers.h"
|
3
3
|
#include "include/token.h"
|
4
|
+
#include "include/utf8.h"
|
4
5
|
#include "include/util.h"
|
5
6
|
|
6
7
|
#include <ctype.h>
|
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
|
|
84
85
|
}
|
85
86
|
}
|
86
87
|
|
88
|
+
static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
|
89
|
+
if (byte_count <= 0) { return; }
|
90
|
+
|
91
|
+
if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
|
92
|
+
if (!is_newline(lexer->current_character)) { lexer->current_column++; }
|
93
|
+
|
94
|
+
lexer->current_position += byte_count;
|
95
|
+
|
96
|
+
if (lexer->current_position >= lexer->source_length) {
|
97
|
+
lexer->current_position = lexer->source_length;
|
98
|
+
lexer->current_character = '\0';
|
99
|
+
} else {
|
100
|
+
lexer->current_character = lexer->source[lexer->current_position];
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
87
105
|
static void lexer_advance_by(lexer_T* lexer, const size_t count) {
|
88
106
|
for (size_t i = 0; i < count; i++) {
|
89
107
|
lexer_advance(lexer);
|
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
|
|
116
134
|
return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
|
117
135
|
}
|
118
136
|
|
137
|
+
static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
|
138
|
+
int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
|
139
|
+
|
140
|
+
if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
|
141
|
+
|
142
|
+
char* utf8_char = malloc(char_byte_length + 1);
|
143
|
+
|
144
|
+
if (!utf8_char) { return lexer_advance_current(lexer, type); }
|
145
|
+
|
146
|
+
for (int i = 0; i < char_byte_length; i++) {
|
147
|
+
if (lexer->current_position + i >= lexer->source_length) {
|
148
|
+
free(utf8_char);
|
149
|
+
return lexer_advance_current(lexer, type);
|
150
|
+
}
|
151
|
+
|
152
|
+
utf8_char[i] = lexer->source[lexer->current_position + i];
|
153
|
+
}
|
154
|
+
|
155
|
+
utf8_char[char_byte_length] = '\0';
|
156
|
+
|
157
|
+
lexer_advance_utf8_bytes(lexer, char_byte_length);
|
158
|
+
|
159
|
+
token_T* token = token_init(utf8_char, type, lexer);
|
160
|
+
|
161
|
+
free(utf8_char);
|
162
|
+
|
163
|
+
return token;
|
164
|
+
}
|
165
|
+
|
119
166
|
static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
|
120
167
|
if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
|
121
168
|
return lexer_advance_with(lexer, value, type);
|
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
232
279
|
if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
|
233
280
|
|
234
281
|
if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
|
235
|
-
return
|
282
|
+
return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
|
236
283
|
}
|
237
284
|
|
238
285
|
switch (lexer->current_character) {
|
@@ -282,7 +329,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
282
329
|
default: {
|
283
330
|
if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
|
284
331
|
|
285
|
-
return
|
332
|
+
return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
|
286
333
|
}
|
287
334
|
}
|
288
335
|
}
|
data/src/parser.c
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "include/parser_helpers.h"
|
10
10
|
#include "include/token.h"
|
11
11
|
#include "include/token_matchers.h"
|
12
|
+
#include "include/util.h"
|
12
13
|
|
13
14
|
#include <stdio.h>
|
14
15
|
#include <stdlib.h>
|
@@ -184,14 +185,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
184
185
|
|
185
186
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
186
187
|
array_T* errors = array_init(8);
|
187
|
-
token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
188
188
|
|
189
|
-
|
189
|
+
token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
|
190
|
+
token_T* first_token = NULL;
|
191
|
+
|
192
|
+
if (at_token != NULL) {
|
193
|
+
first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
194
|
+
|
195
|
+
if (first_token == NULL) {
|
196
|
+
parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
|
197
|
+
|
198
|
+
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
199
|
+
ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
|
200
|
+
|
201
|
+
token_free(at_token);
|
202
|
+
|
203
|
+
return attribute_name;
|
204
|
+
}
|
205
|
+
} else {
|
206
|
+
first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
207
|
+
|
208
|
+
if (first_token == NULL) {
|
209
|
+
parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
|
210
|
+
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
|
211
|
+
return attribute_name;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
buffer_T name_buffer = buffer_new();
|
216
|
+
|
217
|
+
position_T* start_position;
|
218
|
+
|
219
|
+
if (at_token != NULL) {
|
220
|
+
buffer_append(&name_buffer, at_token->value);
|
221
|
+
start_position = position_copy(at_token->location->start);
|
222
|
+
} else {
|
223
|
+
start_position = position_copy(first_token->location->start);
|
224
|
+
}
|
225
|
+
|
226
|
+
buffer_append(&name_buffer, first_token->value);
|
227
|
+
|
228
|
+
position_T* end_position = position_copy(first_token->location->end);
|
229
|
+
size_t range_end = first_token->range->to;
|
230
|
+
|
231
|
+
while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
|
232
|
+
&& strcmp(parser->current_token->value, ".") == 0) {
|
233
|
+
|
234
|
+
token_T* dot_token = parser_advance(parser);
|
235
|
+
|
236
|
+
buffer_append(&name_buffer, dot_token->value);
|
237
|
+
position_free(end_position);
|
238
|
+
|
239
|
+
end_position = position_copy(dot_token->location->end);
|
240
|
+
range_end = dot_token->range->to;
|
241
|
+
|
242
|
+
token_free(dot_token);
|
243
|
+
|
244
|
+
if (parser->current_token->type == TOKEN_IDENTIFIER) {
|
245
|
+
token_T* next_identifier = parser_advance(parser);
|
246
|
+
|
247
|
+
buffer_append(&name_buffer, next_identifier->value);
|
248
|
+
position_free(end_position);
|
249
|
+
|
250
|
+
end_position = position_copy(next_identifier->location->end);
|
251
|
+
range_end = next_identifier->range->to;
|
252
|
+
token_free(next_identifier);
|
253
|
+
} else {
|
254
|
+
break;
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
token_T* combined_token = calloc(1, sizeof(token_T));
|
259
|
+
combined_token->value = herb_strdup(name_buffer.value);
|
260
|
+
combined_token->type = TOKEN_IDENTIFIER;
|
261
|
+
combined_token->location =
|
262
|
+
location_from(start_position->line, start_position->column, end_position->line, end_position->column);
|
263
|
+
|
264
|
+
size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
|
265
|
+
combined_token->range = range_init(range_start, range_end);
|
190
266
|
|
191
267
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
192
|
-
ast_html_attribute_name_node_init(
|
268
|
+
ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
|
193
269
|
|
194
|
-
|
270
|
+
buffer_free(&name_buffer);
|
271
|
+
position_free(start_position);
|
272
|
+
position_free(end_position);
|
273
|
+
token_free(first_token);
|
274
|
+
|
275
|
+
if (at_token != NULL) { token_free(at_token); }
|
276
|
+
|
277
|
+
token_free(combined_token);
|
195
278
|
|
196
279
|
return attribute_name;
|
197
280
|
}
|
@@ -390,10 +473,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
390
473
|
continue;
|
391
474
|
}
|
392
475
|
|
476
|
+
if (parser->current_token->type == TOKEN_AT) {
|
477
|
+
array_append(children, parser_parse_html_attribute(parser));
|
478
|
+
continue;
|
479
|
+
}
|
480
|
+
|
393
481
|
parser_append_unexpected_error(
|
394
482
|
parser,
|
395
483
|
"Unexpected Token",
|
396
|
-
"TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
|
484
|
+
"TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
|
397
485
|
errors
|
398
486
|
);
|
399
487
|
}
|
@@ -441,6 +529,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
441
529
|
|
442
530
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
443
531
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
532
|
+
|
533
|
+
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
534
|
+
token_T* whitespace = parser_advance(parser);
|
535
|
+
token_free(whitespace);
|
536
|
+
}
|
537
|
+
|
444
538
|
token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
|
445
539
|
|
446
540
|
if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
|
@@ -618,12 +712,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
618
712
|
if (token_is_any_of(
|
619
713
|
parser,
|
620
714
|
TOKEN_AMPERSAND,
|
715
|
+
TOKEN_AT,
|
621
716
|
TOKEN_CHARACTER,
|
622
717
|
TOKEN_COLON,
|
623
718
|
TOKEN_DASH,
|
624
719
|
TOKEN_EQUALS,
|
625
720
|
TOKEN_EXCLAMATION,
|
626
721
|
TOKEN_IDENTIFIER,
|
722
|
+
TOKEN_NBSP,
|
627
723
|
TOKEN_NEWLINE,
|
628
724
|
TOKEN_PERCENT,
|
629
725
|
TOKEN_QUOTE,
|
@@ -639,8 +735,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
639
735
|
parser_append_unexpected_error(
|
640
736
|
parser,
|
641
737
|
"Unexpected token",
|
642
|
-
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE,
|
643
|
-
"TOKEN_NEWLINE",
|
738
|
+
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
|
739
|
+
"TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
|
644
740
|
errors
|
645
741
|
);
|
646
742
|
}
|
data/src/utf8.c
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#include "include/utf8.h"
|
2
|
+
|
3
|
+
// UTF-8 byte patterns:
|
4
|
+
// 0xxxxxxx = 1 byte (ASCII)
|
5
|
+
// 110xxxxx = 2 bytes
|
6
|
+
// 1110xxxx = 3 bytes
|
7
|
+
// 11110xxx = 4 bytes
|
8
|
+
int utf8_char_byte_length(unsigned char first_byte) {
|
9
|
+
if ((first_byte & 0x80) == 0) {
|
10
|
+
return 1;
|
11
|
+
} else if ((first_byte & 0xE0) == 0xC0) {
|
12
|
+
return 2;
|
13
|
+
} else if ((first_byte & 0xF0) == 0xE0) {
|
14
|
+
return 3;
|
15
|
+
} else if ((first_byte & 0xF8) == 0xF0) {
|
16
|
+
return 4;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 1;
|
20
|
+
}
|
21
|
+
|
22
|
+
// Continuation bytes have pattern 10xxxxxx
|
23
|
+
bool utf8_is_valid_continuation_byte(unsigned char byte) {
|
24
|
+
return (byte & 0xC0) == 0x80;
|
25
|
+
}
|
26
|
+
|
27
|
+
int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
|
28
|
+
if (position >= max_length) { return 0; }
|
29
|
+
|
30
|
+
unsigned char first_byte = (unsigned char) str[position];
|
31
|
+
int expected_length = utf8_char_byte_length(first_byte);
|
32
|
+
|
33
|
+
if (position + expected_length > max_length) {
|
34
|
+
return 1; // Not enough bytes, treat as single byte
|
35
|
+
}
|
36
|
+
|
37
|
+
if (expected_length > 1) {
|
38
|
+
for (int i = 1; i < expected_length; i++) {
|
39
|
+
if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
|
40
|
+
return 1; // Invalid continuation byte, treat first byte as single byte
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
return expected_length;
|
46
|
+
}
|
data/src/visitor.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/visitor.c.erb
|
3
3
|
|
4
4
|
#include <stdio.h>
|
5
5
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: herb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: x86_64-linux-gnu
|
6
6
|
authors:
|
7
7
|
- Marco Roth
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-08-03 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: Powerful and seamless HTML-aware ERB parsing and tooling.
|
13
13
|
email:
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- src/include/token.h
|
126
126
|
- src/include/token_matchers.h
|
127
127
|
- src/include/token_struct.h
|
128
|
+
- src/include/utf8.h
|
128
129
|
- src/include/util.h
|
129
130
|
- src/include/version.h
|
130
131
|
- src/include/visitor.h
|
@@ -144,6 +145,7 @@ files:
|
|
144
145
|
- src/ruby_parser.c
|
145
146
|
- src/token.c
|
146
147
|
- src/token_matchers.c
|
148
|
+
- src/utf8.c
|
147
149
|
- src/util.c
|
148
150
|
- src/visitor.c
|
149
151
|
homepage: https://herb-tools.dev
|