herb 0.4.2-x86-linux-gnu → 0.4.3-x86-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/herb/error_helpers.c +1 -1
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/nodes.c +2 -2
- data/ext/herb/nodes.h +1 -1
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/ast/nodes.rb +1 -1
- data/lib/herb/cli.rb +2 -2
- data/lib/herb/errors.rb +1 -1
- data/lib/herb/project.rb +2 -0
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +1 -1
- data/sig/serialized_ast_errors.rbs +1 -1
- data/sig/serialized_ast_nodes.rbs +1 -1
- data/src/ast_nodes.c +1 -1
- data/src/ast_pretty_print.c +1 -1
- data/src/errors.c +1 -1
- data/src/include/ast_nodes.h +1 -1
- data/src/include/ast_pretty_print.h +1 -1
- data/src/include/errors.h +1 -1
- data/src/include/utf8.h +11 -0
- data/src/include/version.h +1 -1
- data/src/lexer.c +49 -2
- data/src/parser.c +103 -7
- data/src/utf8.c +46 -0
- data/src/visitor.c +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '009cf4ac55e81c9429b44878b496574b65da6bff0efa228ef03ab965af34899c'
|
4
|
+
data.tar.gz: a8837d08cb10343e5d36f83412bb5e46a060cb086ea5c9979309db2038809349
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 554c5aaa7140798607a22ac6b3e9397923eb6ae64e8b6d2c3c4642a88503bf9dff34ea1526370f1d2136777c1ed799737a23bf69741809ddbd2c764d91b43390
|
7
|
+
data.tar.gz: '08bb02fd7137576fd4a8daed894a1b83fe4f7c15a5277785f76cc14fc0c1753f86ef22af69f0f5feb7e3177e245a3afbe483ad1203427c288f3d7c7fa20ede9a'
|
data/ext/herb/error_helpers.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.c.erb
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
data/ext/herb/error_helpers.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_EXTENSION_ERROR_HELPERS_H
|
5
5
|
#define HERB_EXTENSION_ERROR_HELPERS_H
|
data/ext/herb/nodes.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.c.erb
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
@@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte
|
|
415
415
|
VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
|
416
416
|
VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
|
417
417
|
VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
|
418
|
-
/* #<Herb::Template::AnalyzedRubyField:
|
418
|
+
/* #<Herb::Template::AnalyzedRubyField:0x00007ffffed7e348 @name="analyzed_ruby", @options={kind: nil}> */
|
419
419
|
VALUE erb_content_node_analyzed_ruby = Qnil;
|
420
420
|
VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
|
421
421
|
VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;
|
data/ext/herb/nodes.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_EXTENSION_NODES_H
|
5
5
|
#define HERB_EXTENSION_NODES_H
|
data/lib/herb/3.0/herb.so
CHANGED
Binary file
|
data/lib/herb/3.1/herb.so
CHANGED
Binary file
|
data/lib/herb/3.2/herb.so
CHANGED
Binary file
|
data/lib/herb/3.3/herb.so
CHANGED
Binary file
|
data/lib/herb/3.4/herb.so
CHANGED
Binary file
|
data/lib/herb/ast/nodes.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/ast/nodes.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
module AST
|
data/lib/herb/cli.rb
CHANGED
@@ -110,8 +110,8 @@ class Herb::CLI
|
|
110
110
|
project.no_interactive = no_interactive
|
111
111
|
project.no_log_file = no_log_file
|
112
112
|
project.no_timing = no_timing
|
113
|
-
project.parse!
|
114
|
-
exit(0)
|
113
|
+
has_issues = project.parse!
|
114
|
+
exit(has_issues ? 1 : 0)
|
115
115
|
when "parse"
|
116
116
|
Herb.parse(file_content)
|
117
117
|
when "lex"
|
data/lib/herb/errors.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/errors.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
module Errors
|
data/lib/herb/project.rb
CHANGED
data/lib/herb/version.rb
CHANGED
data/lib/herb/visitor.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/visitor.rb.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
class Visitor
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_errors.rbs.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
type serialized_unexpected_error = serialized_error & {
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# typed: true
|
3
3
|
|
4
4
|
# NOTE: This file is generated by the templates/template.rb script and should not be
|
5
|
-
# modified manually. See /Users/marcoroth/Development/herb-release-
|
5
|
+
# modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_nodes.rbs.erb
|
6
6
|
|
7
7
|
module Herb
|
8
8
|
type serialized_document_node = serialized_node & {
|
data/src/ast_nodes.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_nodes.c.erb
|
3
3
|
|
4
4
|
#include <stdio.h>
|
5
5
|
#include <stdbool.h>
|
data/src/ast_pretty_print.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_pretty_print.c.erb
|
3
3
|
|
4
4
|
#include "include/ast_node.h"
|
5
5
|
#include "include/ast_nodes.h"
|
data/src/errors.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/errors.c.erb
|
3
3
|
|
4
4
|
#include "include/array.h"
|
5
5
|
#include "include/errors.h"
|
data/src/include/ast_nodes.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_nodes.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_AST_NODES_H
|
5
5
|
#define HERB_AST_NODES_H
|
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_pretty_print.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_AST_PRETTY_PRINT_H
|
5
5
|
#define HERB_AST_PRETTY_PRINT_H
|
data/src/include/errors.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/errors.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_ERRORS_H
|
5
5
|
#define HERB_ERRORS_H
|
data/src/include/utf8.h
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef HERB_UTF8_H
|
2
|
+
#define HERB_UTF8_H
|
3
|
+
|
4
|
+
#include <stdbool.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
|
7
|
+
int utf8_char_byte_length(unsigned char first_byte);
|
8
|
+
int utf8_sequence_length(const char* str, size_t position, size_t max_length);
|
9
|
+
bool utf8_is_valid_continuation_byte(unsigned char byte);
|
10
|
+
|
11
|
+
#endif
|
data/src/include/version.h
CHANGED
data/src/lexer.c
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "include/buffer.h"
|
2
2
|
#include "include/lexer_peek_helpers.h"
|
3
3
|
#include "include/token.h"
|
4
|
+
#include "include/utf8.h"
|
4
5
|
#include "include/util.h"
|
5
6
|
|
6
7
|
#include <ctype.h>
|
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
|
|
84
85
|
}
|
85
86
|
}
|
86
87
|
|
88
|
+
static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
|
89
|
+
if (byte_count <= 0) { return; }
|
90
|
+
|
91
|
+
if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
|
92
|
+
if (!is_newline(lexer->current_character)) { lexer->current_column++; }
|
93
|
+
|
94
|
+
lexer->current_position += byte_count;
|
95
|
+
|
96
|
+
if (lexer->current_position >= lexer->source_length) {
|
97
|
+
lexer->current_position = lexer->source_length;
|
98
|
+
lexer->current_character = '\0';
|
99
|
+
} else {
|
100
|
+
lexer->current_character = lexer->source[lexer->current_position];
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
87
105
|
static void lexer_advance_by(lexer_T* lexer, const size_t count) {
|
88
106
|
for (size_t i = 0; i < count; i++) {
|
89
107
|
lexer_advance(lexer);
|
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
|
|
116
134
|
return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
|
117
135
|
}
|
118
136
|
|
137
|
+
static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
|
138
|
+
int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
|
139
|
+
|
140
|
+
if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
|
141
|
+
|
142
|
+
char* utf8_char = malloc(char_byte_length + 1);
|
143
|
+
|
144
|
+
if (!utf8_char) { return lexer_advance_current(lexer, type); }
|
145
|
+
|
146
|
+
for (int i = 0; i < char_byte_length; i++) {
|
147
|
+
if (lexer->current_position + i >= lexer->source_length) {
|
148
|
+
free(utf8_char);
|
149
|
+
return lexer_advance_current(lexer, type);
|
150
|
+
}
|
151
|
+
|
152
|
+
utf8_char[i] = lexer->source[lexer->current_position + i];
|
153
|
+
}
|
154
|
+
|
155
|
+
utf8_char[char_byte_length] = '\0';
|
156
|
+
|
157
|
+
lexer_advance_utf8_bytes(lexer, char_byte_length);
|
158
|
+
|
159
|
+
token_T* token = token_init(utf8_char, type, lexer);
|
160
|
+
|
161
|
+
free(utf8_char);
|
162
|
+
|
163
|
+
return token;
|
164
|
+
}
|
165
|
+
|
119
166
|
static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
|
120
167
|
if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
|
121
168
|
return lexer_advance_with(lexer, value, type);
|
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
232
279
|
if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
|
233
280
|
|
234
281
|
if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
|
235
|
-
return
|
282
|
+
return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
|
236
283
|
}
|
237
284
|
|
238
285
|
switch (lexer->current_character) {
|
@@ -282,7 +329,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
282
329
|
default: {
|
283
330
|
if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
|
284
331
|
|
285
|
-
return
|
332
|
+
return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
|
286
333
|
}
|
287
334
|
}
|
288
335
|
}
|
data/src/parser.c
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "include/parser_helpers.h"
|
10
10
|
#include "include/token.h"
|
11
11
|
#include "include/token_matchers.h"
|
12
|
+
#include "include/util.h"
|
12
13
|
|
13
14
|
#include <stdio.h>
|
14
15
|
#include <stdlib.h>
|
@@ -184,14 +185,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
184
185
|
|
185
186
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
186
187
|
array_T* errors = array_init(8);
|
187
|
-
token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
188
188
|
|
189
|
-
|
189
|
+
token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
|
190
|
+
token_T* first_token = NULL;
|
191
|
+
|
192
|
+
if (at_token != NULL) {
|
193
|
+
first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
194
|
+
|
195
|
+
if (first_token == NULL) {
|
196
|
+
parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
|
197
|
+
|
198
|
+
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
199
|
+
ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
|
200
|
+
|
201
|
+
token_free(at_token);
|
202
|
+
|
203
|
+
return attribute_name;
|
204
|
+
}
|
205
|
+
} else {
|
206
|
+
first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
207
|
+
|
208
|
+
if (first_token == NULL) {
|
209
|
+
parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
|
210
|
+
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
|
211
|
+
return attribute_name;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
buffer_T name_buffer = buffer_new();
|
216
|
+
|
217
|
+
position_T* start_position;
|
218
|
+
|
219
|
+
if (at_token != NULL) {
|
220
|
+
buffer_append(&name_buffer, at_token->value);
|
221
|
+
start_position = position_copy(at_token->location->start);
|
222
|
+
} else {
|
223
|
+
start_position = position_copy(first_token->location->start);
|
224
|
+
}
|
225
|
+
|
226
|
+
buffer_append(&name_buffer, first_token->value);
|
227
|
+
|
228
|
+
position_T* end_position = position_copy(first_token->location->end);
|
229
|
+
size_t range_end = first_token->range->to;
|
230
|
+
|
231
|
+
while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
|
232
|
+
&& strcmp(parser->current_token->value, ".") == 0) {
|
233
|
+
|
234
|
+
token_T* dot_token = parser_advance(parser);
|
235
|
+
|
236
|
+
buffer_append(&name_buffer, dot_token->value);
|
237
|
+
position_free(end_position);
|
238
|
+
|
239
|
+
end_position = position_copy(dot_token->location->end);
|
240
|
+
range_end = dot_token->range->to;
|
241
|
+
|
242
|
+
token_free(dot_token);
|
243
|
+
|
244
|
+
if (parser->current_token->type == TOKEN_IDENTIFIER) {
|
245
|
+
token_T* next_identifier = parser_advance(parser);
|
246
|
+
|
247
|
+
buffer_append(&name_buffer, next_identifier->value);
|
248
|
+
position_free(end_position);
|
249
|
+
|
250
|
+
end_position = position_copy(next_identifier->location->end);
|
251
|
+
range_end = next_identifier->range->to;
|
252
|
+
token_free(next_identifier);
|
253
|
+
} else {
|
254
|
+
break;
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
token_T* combined_token = calloc(1, sizeof(token_T));
|
259
|
+
combined_token->value = herb_strdup(name_buffer.value);
|
260
|
+
combined_token->type = TOKEN_IDENTIFIER;
|
261
|
+
combined_token->location =
|
262
|
+
location_from(start_position->line, start_position->column, end_position->line, end_position->column);
|
263
|
+
|
264
|
+
size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
|
265
|
+
combined_token->range = range_init(range_start, range_end);
|
190
266
|
|
191
267
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
192
|
-
ast_html_attribute_name_node_init(
|
268
|
+
ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
|
193
269
|
|
194
|
-
|
270
|
+
buffer_free(&name_buffer);
|
271
|
+
position_free(start_position);
|
272
|
+
position_free(end_position);
|
273
|
+
token_free(first_token);
|
274
|
+
|
275
|
+
if (at_token != NULL) { token_free(at_token); }
|
276
|
+
|
277
|
+
token_free(combined_token);
|
195
278
|
|
196
279
|
return attribute_name;
|
197
280
|
}
|
@@ -390,10 +473,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
390
473
|
continue;
|
391
474
|
}
|
392
475
|
|
476
|
+
if (parser->current_token->type == TOKEN_AT) {
|
477
|
+
array_append(children, parser_parse_html_attribute(parser));
|
478
|
+
continue;
|
479
|
+
}
|
480
|
+
|
393
481
|
parser_append_unexpected_error(
|
394
482
|
parser,
|
395
483
|
"Unexpected Token",
|
396
|
-
"TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
|
484
|
+
"TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
|
397
485
|
errors
|
398
486
|
);
|
399
487
|
}
|
@@ -441,6 +529,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
441
529
|
|
442
530
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
443
531
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
532
|
+
|
533
|
+
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
534
|
+
token_T* whitespace = parser_advance(parser);
|
535
|
+
token_free(whitespace);
|
536
|
+
}
|
537
|
+
|
444
538
|
token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
|
445
539
|
|
446
540
|
if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
|
@@ -618,12 +712,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
618
712
|
if (token_is_any_of(
|
619
713
|
parser,
|
620
714
|
TOKEN_AMPERSAND,
|
715
|
+
TOKEN_AT,
|
621
716
|
TOKEN_CHARACTER,
|
622
717
|
TOKEN_COLON,
|
623
718
|
TOKEN_DASH,
|
624
719
|
TOKEN_EQUALS,
|
625
720
|
TOKEN_EXCLAMATION,
|
626
721
|
TOKEN_IDENTIFIER,
|
722
|
+
TOKEN_NBSP,
|
627
723
|
TOKEN_NEWLINE,
|
628
724
|
TOKEN_PERCENT,
|
629
725
|
TOKEN_QUOTE,
|
@@ -639,8 +735,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
639
735
|
parser_append_unexpected_error(
|
640
736
|
parser,
|
641
737
|
"Unexpected token",
|
642
|
-
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE,
|
643
|
-
"TOKEN_NEWLINE",
|
738
|
+
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
|
739
|
+
"TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
|
644
740
|
errors
|
645
741
|
);
|
646
742
|
}
|
data/src/utf8.c
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#include "include/utf8.h"
|
2
|
+
|
3
|
+
// UTF-8 byte patterns:
|
4
|
+
// 0xxxxxxx = 1 byte (ASCII)
|
5
|
+
// 110xxxxx = 2 bytes
|
6
|
+
// 1110xxxx = 3 bytes
|
7
|
+
// 11110xxx = 4 bytes
|
8
|
+
int utf8_char_byte_length(unsigned char first_byte) {
|
9
|
+
if ((first_byte & 0x80) == 0) {
|
10
|
+
return 1;
|
11
|
+
} else if ((first_byte & 0xE0) == 0xC0) {
|
12
|
+
return 2;
|
13
|
+
} else if ((first_byte & 0xF0) == 0xE0) {
|
14
|
+
return 3;
|
15
|
+
} else if ((first_byte & 0xF8) == 0xF0) {
|
16
|
+
return 4;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 1;
|
20
|
+
}
|
21
|
+
|
22
|
+
// Continuation bytes have pattern 10xxxxxx
|
23
|
+
bool utf8_is_valid_continuation_byte(unsigned char byte) {
|
24
|
+
return (byte & 0xC0) == 0x80;
|
25
|
+
}
|
26
|
+
|
27
|
+
int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
|
28
|
+
if (position >= max_length) { return 0; }
|
29
|
+
|
30
|
+
unsigned char first_byte = (unsigned char) str[position];
|
31
|
+
int expected_length = utf8_char_byte_length(first_byte);
|
32
|
+
|
33
|
+
if (position + expected_length > max_length) {
|
34
|
+
return 1; // Not enough bytes, treat as single byte
|
35
|
+
}
|
36
|
+
|
37
|
+
if (expected_length > 1) {
|
38
|
+
for (int i = 1; i < expected_length; i++) {
|
39
|
+
if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
|
40
|
+
return 1; // Invalid continuation byte, treat first byte as single byte
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
return expected_length;
|
46
|
+
}
|
data/src/visitor.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/visitor.c.erb
|
3
3
|
|
4
4
|
#include <stdio.h>
|
5
5
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: herb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: x86-linux-gnu
|
6
6
|
authors:
|
7
7
|
- Marco Roth
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-08-03 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: Powerful and seamless HTML-aware ERB parsing and tooling.
|
13
13
|
email:
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- src/include/token.h
|
126
126
|
- src/include/token_matchers.h
|
127
127
|
- src/include/token_struct.h
|
128
|
+
- src/include/utf8.h
|
128
129
|
- src/include/util.h
|
129
130
|
- src/include/version.h
|
130
131
|
- src/include/visitor.h
|
@@ -144,6 +145,7 @@ files:
|
|
144
145
|
- src/ruby_parser.c
|
145
146
|
- src/token.c
|
146
147
|
- src/token_matchers.c
|
148
|
+
- src/utf8.c
|
147
149
|
- src/util.c
|
148
150
|
- src/visitor.c
|
149
151
|
homepage: https://herb-tools.dev
|