herb 0.4.2-arm-linux-gnu → 0.5.0-arm-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f27aaaad81ad225f5316278ca8a3c2412d624597d2a28e7eb5cafce3db32316e
4
- data.tar.gz: d6c58ecbfed32f6c6afda64e1956d24f8b354bff4647ec6b01c93846742e51c5
3
+ metadata.gz: 58698840c9bcb8e894eb66edacdd95511702fd1afb0f63995aca04460dae9144
4
+ data.tar.gz: d932daf7cbc0e0e14f885fbf7a70b097d48361d0108ba7cd85a70948d3471c2f
5
5
  SHA512:
6
- metadata.gz: 871f6d51b0eb6be8d052b4ede53807e598a4e65a2b1d94139b2e834f731d11b136624dbe1e9351cc357e100afb1b98f4fe447b114b45328a51d6858747b6d2d2
7
- data.tar.gz: 8ad246404e5f6e1e3c6926bfba00a7ea99860d82e8824b0290c29123edc71a11ab0a52801f196819b4e714754ec0a985d28f6f1151ae47987a7d19a0380b1b06
6
+ metadata.gz: a49a007afc27785a6d96c959b1467303d0c81130cdc6e10e8713baa8f240d36765dc655c22a75e446291f0d4d8addead9ffce2fd09af7589fc64f19c50e5ea5b
7
+ data.tar.gz: e0ea0d65b094d779ce93237fcf19098a7da5149b141fc1743022cde10f94a6b75aea0e2896d15704be9a103c26ba2412b27026b855070d5179df838e7add9523
data/README.md CHANGED
@@ -37,7 +37,7 @@ Herb provides a complete ecosystem of HTML+ERB tooling, designed to simplify and
37
37
  Automatic, consistent formatting for HTML+ERB files, reducing manual styling and enforcing a standard across projects. Currently in experimental preview - use with caution on version-controlled files.
38
38
 
39
39
  - **Herb Linter** ([available now](https://herb-tools.dev/projects/linter)):
40
- Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with 17 configurable rules.
40
+ Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with plenty of rules.
41
41
 
42
42
  You can use Herb programmatically in **Ruby**, as well as in **JavaScript** via Node.js, WebAssembly, or directly in browsers.
43
43
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/error_helpers.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.c.erb
3
3
 
4
4
  #include <ruby.h>
5
5
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/error_helpers.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_ERROR_HELPERS_H
5
5
  #define HERB_EXTENSION_ERROR_HELPERS_H
data/ext/herb/nodes.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.c.erb
3
3
 
4
4
  #include <ruby.h>
5
5
 
@@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte
415
415
  VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
416
416
  VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
417
417
  VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
418
- /* #<Herb::Template::AnalyzedRubyField:0x00007fffe335ba20 @name="analyzed_ruby", @options={kind: nil}> */
418
+ /* #<Herb::Template::AnalyzedRubyField:0x00007fffe3389178 @name="analyzed_ruby", @options={kind: nil}> */
419
419
  VALUE erb_content_node_analyzed_ruby = Qnil;
420
420
  VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
421
421
  VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;
data/ext/herb/nodes.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_NODES_H
5
5
  #define HERB_EXTENSION_NODES_H
data/lib/herb/3.0/herb.so CHANGED
Binary file
data/lib/herb/3.1/herb.so CHANGED
Binary file
data/lib/herb/3.2/herb.so CHANGED
Binary file
data/lib/herb/3.3/herb.so CHANGED
Binary file
data/lib/herb/3.4/herb.so CHANGED
Binary file
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/ast/nodes.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/ast/nodes.rb.erb
6
6
 
7
7
  module Herb
8
8
  module AST
data/lib/herb/cli.rb CHANGED
@@ -110,8 +110,8 @@ class Herb::CLI
110
110
  project.no_interactive = no_interactive
111
111
  project.no_log_file = no_log_file
112
112
  project.no_timing = no_timing
113
- project.parse!
114
- exit(0)
113
+ has_issues = project.parse!
114
+ exit(has_issues ? 1 : 0)
115
115
  when "parse"
116
116
  Herb.parse(file_content)
117
117
  when "lex"
data/lib/herb/errors.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/errors.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/errors.rb.erb
6
6
 
7
7
  module Herb
8
8
  module Errors
data/lib/herb/project.rb CHANGED
@@ -366,6 +366,8 @@ module Herb
366
366
  end
367
367
 
368
368
  puts "\nResults saved to #{output_file}" unless no_log_file
369
+
370
+ problem_files.any?
369
371
  ensure
370
372
  log.close unless no_log_file
371
373
  end
data/lib/herb/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
  # typed: true
3
3
 
4
4
  module Herb
5
- VERSION = "0.4.2"
5
+ VERSION = "0.5.0"
6
6
  end
data/lib/herb/visitor.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/visitor.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/visitor.rb.erb
6
6
 
7
7
  module Herb
8
8
  class Visitor
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-6/templates/sig/serialized_ast_errors.rbs.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_errors.rbs.erb
6
6
 
7
7
  module Herb
8
8
  type serialized_unexpected_error = serialized_error & {
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-6/templates/sig/serialized_ast_nodes.rbs.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_nodes.rbs.erb
6
6
 
7
7
  module Herb
8
8
  type serialized_document_node = serialized_node & {
data/src/analyze.c CHANGED
@@ -50,7 +50,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
50
50
  AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
51
51
 
52
52
  const char* opening = erb_content_node->tag_opening->value;
53
- if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0) {
53
+
54
+ if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
54
55
  analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
55
56
 
56
57
  if (false) { pretty_print_analyed_ruby(analyzed, erb_content_node->content->value); }
data/src/ast_nodes.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_nodes.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
  #include <stdbool.h>
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_pretty_print.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_pretty_print.c.erb
3
3
 
4
4
  #include "include/ast_node.h"
5
5
  #include "include/ast_nodes.h"
data/src/errors.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/errors.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/errors.c.erb
3
3
 
4
4
  #include "include/array.h"
5
5
  #include "include/errors.h"
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
data/src/include/errors.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
data/src/include/parser.h CHANGED
@@ -5,10 +5,22 @@
5
5
  #include "ast_node.h"
6
6
  #include "lexer.h"
7
7
 
8
+ typedef enum {
9
+ FOREIGN_CONTENT_UNKNOWN = 0,
10
+ FOREIGN_CONTENT_SCRIPT,
11
+ FOREIGN_CONTENT_STYLE,
12
+ // FOREIGN_CONTENT_RUBY,
13
+ // FOREIGN_CONTENT_TEMPLATE
14
+ } foreign_content_type_T;
15
+
16
+ typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
17
+
8
18
  typedef struct PARSER_STRUCT {
9
19
  lexer_T* lexer;
10
20
  token_T* current_token;
11
21
  array_T* open_tags_stack;
22
+ parser_state_T state;
23
+ foreign_content_type_T foreign_content_type;
12
24
  } parser_T;
13
25
 
14
26
  parser_T* parser_init(lexer_T* lexer);
@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
24
24
 
25
25
  bool parser_in_svg_context(const parser_T* parser);
26
26
 
27
+ foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
28
+ bool parser_is_foreign_content_tag(const char* tag_name);
29
+ const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
30
+
31
+ void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
32
+ void parser_exit_foreign_content(parser_T* parser);
33
+
34
+ bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
35
+
27
36
  token_T* parser_advance(parser_T* parser);
28
37
  token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
29
38
  token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);
@@ -28,6 +28,7 @@ typedef enum {
28
28
  TOKEN_SLASH, // /
29
29
  TOKEN_EQUALS, // =
30
30
  TOKEN_QUOTE, // ", '
31
+ TOKEN_BACKTICK, // `
31
32
  TOKEN_DASH, // -
32
33
  TOKEN_UNDERSCORE, // _
33
34
  TOKEN_EXCLAMATION, // !
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.2"
4
+ #define HERB_VERSION "0.5.0"
5
5
 
6
6
  #endif
data/src/lexer.c CHANGED
@@ -1,6 +1,7 @@
1
1
  #include "include/buffer.h"
2
2
  #include "include/lexer_peek_helpers.h"
3
3
  #include "include/token.h"
4
+ #include "include/utf8.h"
4
5
  #include "include/util.h"
5
6
 
6
7
  #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
84
85
  }
85
86
  }
86
87
 
88
+ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
89
+ if (byte_count <= 0) { return; }
90
+
91
+ if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
92
+ if (!is_newline(lexer->current_character)) { lexer->current_column++; }
93
+
94
+ lexer->current_position += byte_count;
95
+
96
+ if (lexer->current_position >= lexer->source_length) {
97
+ lexer->current_position = lexer->source_length;
98
+ lexer->current_character = '\0';
99
+ } else {
100
+ lexer->current_character = lexer->source[lexer->current_position];
101
+ }
102
+ }
103
+ }
104
+
87
105
  static void lexer_advance_by(lexer_T* lexer, const size_t count) {
88
106
  for (size_t i = 0; i < count; i++) {
89
107
  lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
116
134
  return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
117
135
  }
118
136
 
137
+ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
+ int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
+
140
+ if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
+
142
+ char* utf8_char = malloc(char_byte_length + 1);
143
+
144
+ if (!utf8_char) { return lexer_advance_current(lexer, type); }
145
+
146
+ for (int i = 0; i < char_byte_length; i++) {
147
+ if (lexer->current_position + i >= lexer->source_length) {
148
+ free(utf8_char);
149
+ return lexer_advance_current(lexer, type);
150
+ }
151
+
152
+ utf8_char[i] = lexer->source[lexer->current_position + i];
153
+ }
154
+
155
+ utf8_char[char_byte_length] = '\0';
156
+
157
+ lexer_advance_utf8_bytes(lexer, char_byte_length);
158
+
159
+ token_T* token = token_init(utf8_char, type, lexer);
160
+
161
+ free(utf8_char);
162
+
163
+ return token;
164
+ }
165
+
119
166
  static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
120
167
  if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
121
168
  return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
232
279
  if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
233
280
 
234
281
  if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
235
- return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
282
+ return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
236
283
  }
237
284
 
238
285
  switch (lexer->current_character) {
@@ -278,11 +325,12 @@ token_T* lexer_next_token(lexer_T* lexer) {
278
325
 
279
326
  case '"':
280
327
  case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
328
+ case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
281
329
 
282
330
  default: {
283
331
  if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
284
332
 
285
- return lexer_advance_current(lexer, TOKEN_CHARACTER);
333
+ return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
286
334
  }
287
335
  }
288
336
  }
data/src/parser.c CHANGED
@@ -9,6 +9,7 @@
9
9
  #include "include/parser_helpers.h"
10
10
  #include "include/token.h"
11
11
  #include "include/token_matchers.h"
12
+ #include "include/util.h"
12
13
 
13
14
  #include <stdio.h>
14
15
  #include <stdlib.h>
@@ -16,6 +17,7 @@
16
17
  #include <strings.h>
17
18
 
18
19
  static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
20
+ static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
19
21
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
20
22
 
21
23
  size_t parser_sizeof(void) {
@@ -28,6 +30,8 @@ parser_T* parser_init(lexer_T* lexer) {
28
30
  parser->lexer = lexer;
29
31
  parser->current_token = lexer_next_token(lexer);
30
32
  parser->open_tags_stack = array_init(16);
33
+ parser->state = PARSER_STATE_DATA;
34
+ parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
31
35
 
32
36
  return parser;
33
37
  }
@@ -184,14 +188,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
184
188
 
185
189
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
186
190
  array_T* errors = array_init(8);
187
- token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
188
191
 
189
- if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
192
+ token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
193
+ token_T* first_token = NULL;
194
+
195
+ if (at_token != NULL) {
196
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
197
+
198
+ if (first_token == NULL) {
199
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
200
+
201
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
202
+ ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
203
+
204
+ token_free(at_token);
205
+
206
+ return attribute_name;
207
+ }
208
+ } else {
209
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
210
+
211
+ if (first_token == NULL) {
212
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
213
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
214
+ return attribute_name;
215
+ }
216
+ }
217
+
218
+ buffer_T name_buffer = buffer_new();
219
+
220
+ position_T* start_position;
221
+
222
+ if (at_token != NULL) {
223
+ buffer_append(&name_buffer, at_token->value);
224
+ start_position = position_copy(at_token->location->start);
225
+ } else {
226
+ start_position = position_copy(first_token->location->start);
227
+ }
228
+
229
+ buffer_append(&name_buffer, first_token->value);
230
+
231
+ position_T* end_position = position_copy(first_token->location->end);
232
+ size_t range_end = first_token->range->to;
233
+
234
+ while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
235
+ && strcmp(parser->current_token->value, ".") == 0) {
236
+
237
+ token_T* dot_token = parser_advance(parser);
238
+
239
+ buffer_append(&name_buffer, dot_token->value);
240
+ position_free(end_position);
241
+
242
+ end_position = position_copy(dot_token->location->end);
243
+ range_end = dot_token->range->to;
244
+
245
+ token_free(dot_token);
246
+
247
+ if (parser->current_token->type == TOKEN_IDENTIFIER) {
248
+ token_T* next_identifier = parser_advance(parser);
249
+
250
+ buffer_append(&name_buffer, next_identifier->value);
251
+ position_free(end_position);
252
+
253
+ end_position = position_copy(next_identifier->location->end);
254
+ range_end = next_identifier->range->to;
255
+ token_free(next_identifier);
256
+ } else {
257
+ break;
258
+ }
259
+ }
260
+
261
+ token_T* combined_token = calloc(1, sizeof(token_T));
262
+ combined_token->value = herb_strdup(name_buffer.value);
263
+ combined_token->type = TOKEN_IDENTIFIER;
264
+ combined_token->location =
265
+ location_from(start_position->line, start_position->column, end_position->line, end_position->column);
266
+
267
+ size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
268
+ combined_token->range = range_init(range_start, range_end);
190
269
 
191
270
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
192
- ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
271
+ ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
272
+
273
+ buffer_free(&name_buffer);
274
+ position_free(start_position);
275
+ position_free(end_position);
276
+ token_free(first_token);
193
277
 
194
- token_free(identifier);
278
+ if (at_token != NULL) { token_free(at_token); }
279
+
280
+ token_free(combined_token);
195
281
 
196
282
  return attribute_name;
197
283
  }
@@ -300,6 +386,30 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
300
386
  // <div id="home">
301
387
  if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
302
388
 
389
+ if (token_is(parser, TOKEN_BACKTICK)) {
390
+ token_T* token = parser_advance(parser);
391
+ position_T* start = position_copy(token->location->start);
392
+ position_T* end = position_copy(token->location->end);
393
+
394
+ append_unexpected_error(
395
+ "Invalid quote character for HTML attribute",
396
+ "single quote (') or double quote (\")",
397
+ "backtick (`)",
398
+ start,
399
+ end,
400
+ errors
401
+ );
402
+
403
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
404
+ ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
405
+
406
+ position_free(start);
407
+ position_free(end);
408
+ token_free(token);
409
+
410
+ return value;
411
+ }
412
+
303
413
  token_T* token = parser_advance(parser);
304
414
 
305
415
  append_unexpected_error(
@@ -329,9 +439,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
329
439
  static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
330
440
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
331
441
 
442
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
443
+ token_T* whitespace = parser_advance(parser);
444
+ token_free(whitespace);
445
+ }
446
+
332
447
  token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
333
448
 
334
449
  if (equals != NULL) {
450
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
451
+ token_T* whitespace = parser_advance(parser);
452
+ token_free(whitespace);
453
+ }
454
+
335
455
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
336
456
 
337
457
  AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -390,10 +510,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
390
510
  continue;
391
511
  }
392
512
 
513
+ if (parser->current_token->type == TOKEN_AT) {
514
+ array_append(children, parser_parse_html_attribute(parser));
515
+ continue;
516
+ }
517
+
393
518
  parser_append_unexpected_error(
394
519
  parser,
395
520
  "Unexpected Token",
396
- "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
521
+ "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
397
522
  errors
398
523
  );
399
524
  }
@@ -441,6 +566,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
441
566
 
442
567
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
443
568
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
569
+
570
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
571
+ token_T* whitespace = parser_advance(parser);
572
+ token_free(whitespace);
573
+ }
574
+
444
575
  token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
445
576
 
446
577
  if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -502,7 +633,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
502
633
 
503
634
  parser_push_open_tag(parser, open_tag->tag_name);
504
635
 
505
- parser_parse_in_data_state(parser, body, errors);
636
+ if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
637
+ foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
638
+ parser_enter_foreign_content(parser, content_type);
639
+ parser_parse_foreign_content(parser, body, errors);
640
+ } else {
641
+ parser_parse_in_data_state(parser, body, errors);
642
+ }
506
643
 
507
644
  if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
508
645
 
@@ -593,6 +730,83 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
593
730
  return erb_node;
594
731
  }
595
732
 
733
+ static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
734
+ buffer_T content = buffer_new();
735
+ position_T* start = position_copy(parser->current_token->location->start);
736
+ const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
737
+
738
+ if (expected_closing_tag == NULL) {
739
+ parser_exit_foreign_content(parser);
740
+ position_free(start);
741
+ buffer_free(&content);
742
+
743
+ return;
744
+ }
745
+
746
+ while (!token_is(parser, TOKEN_EOF)) {
747
+ if (token_is(parser, TOKEN_ERB_START)) {
748
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
749
+
750
+ AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
751
+ array_append(children, erb_node);
752
+
753
+ position_free(start);
754
+ start = position_copy(parser->current_token->location->start);
755
+
756
+ continue;
757
+ }
758
+
759
+ if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
760
+ size_t saved_position = parser->lexer->current_position;
761
+ size_t saved_line = parser->lexer->current_line;
762
+ size_t saved_column = parser->lexer->current_column;
763
+ size_t saved_previous_position = parser->lexer->previous_position;
764
+ size_t saved_previous_line = parser->lexer->previous_line;
765
+ size_t saved_previous_column = parser->lexer->previous_column;
766
+
767
+ char saved_char = parser->lexer->current_character;
768
+ lexer_state_T saved_state = parser->lexer->state;
769
+
770
+ token_T* next_token = lexer_next_token(parser->lexer);
771
+ bool is_potential_match = false;
772
+
773
+ if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
774
+ is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
775
+ }
776
+
777
+ parser->lexer->current_position = saved_position;
778
+ parser->lexer->current_line = saved_line;
779
+ parser->lexer->current_column = saved_column;
780
+ parser->lexer->previous_position = saved_previous_position;
781
+ parser->lexer->previous_line = saved_previous_line;
782
+ parser->lexer->previous_column = saved_previous_column;
783
+ parser->lexer->current_character = saved_char;
784
+ parser->lexer->state = saved_state;
785
+
786
+ if (next_token) { token_free(next_token); }
787
+
788
+ if (is_potential_match) {
789
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
790
+ parser_exit_foreign_content(parser);
791
+
792
+ position_free(start);
793
+ buffer_free(&content);
794
+
795
+ return;
796
+ }
797
+ }
798
+
799
+ token_T* token = parser_advance(parser);
800
+ buffer_append(&content, token->value);
801
+ token_free(token);
802
+ }
803
+
804
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
805
+ parser_exit_foreign_content(parser);
806
+ position_free(start);
807
+ buffer_free(&content);
808
+ }
809
+
596
810
  static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
597
811
  while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
598
812
  if (token_is(parser, TOKEN_ERB_START)) {
@@ -618,12 +832,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
618
832
  if (token_is_any_of(
619
833
  parser,
620
834
  TOKEN_AMPERSAND,
835
+ TOKEN_AT,
621
836
  TOKEN_CHARACTER,
622
837
  TOKEN_COLON,
623
838
  TOKEN_DASH,
624
839
  TOKEN_EQUALS,
625
840
  TOKEN_EXCLAMATION,
626
841
  TOKEN_IDENTIFIER,
842
+ TOKEN_NBSP,
627
843
  TOKEN_NEWLINE,
628
844
  TOKEN_PERCENT,
629
845
  TOKEN_QUOTE,
@@ -639,8 +855,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
639
855
  parser_append_unexpected_error(
640
856
  parser,
641
857
  "Unexpected token",
642
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
643
- "TOKEN_NEWLINE",
858
+ "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
859
+ "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
644
860
  errors
645
861
  );
646
862
  }
data/src/parser_helpers.c CHANGED
@@ -8,6 +8,7 @@
8
8
  #include "include/lexer.h"
9
9
  #include "include/parser.h"
10
10
  #include "include/token.h"
11
+ #include "include/token_matchers.h"
11
12
 
12
13
  #include <stdio.h>
13
14
  #include <strings.h>
@@ -54,6 +55,43 @@ bool parser_in_svg_context(const parser_T* parser) {
54
55
  return false;
55
56
  }
56
57
 
58
+ // ===== Foreign Content Handling =====
59
+
60
+ foreign_content_type_T parser_get_foreign_content_type(const char* tag_name) {
61
+ if (tag_name == NULL) { return FOREIGN_CONTENT_UNKNOWN; }
62
+
63
+ if (strcasecmp(tag_name, "script") == 0) { return FOREIGN_CONTENT_SCRIPT; }
64
+ if (strcasecmp(tag_name, "style") == 0) { return FOREIGN_CONTENT_STYLE; }
65
+
66
+ return FOREIGN_CONTENT_UNKNOWN;
67
+ }
68
+
69
+ bool parser_is_foreign_content_tag(const char* tag_name) {
70
+ return parser_get_foreign_content_type(tag_name) != FOREIGN_CONTENT_UNKNOWN;
71
+ }
72
+
73
+ const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type) {
74
+ switch (type) {
75
+ case FOREIGN_CONTENT_SCRIPT: return "script";
76
+ case FOREIGN_CONTENT_STYLE: return "style";
77
+ default: return NULL;
78
+ }
79
+ }
80
+
81
+ void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type) {
82
+ if (parser == NULL) { return; }
83
+
84
+ parser->state = PARSER_STATE_FOREIGN_CONTENT;
85
+ parser->foreign_content_type = type;
86
+ }
87
+
88
+ void parser_exit_foreign_content(parser_T* parser) {
89
+ if (parser == NULL) { return; }
90
+
91
+ parser->state = PARSER_STATE_DATA;
92
+ parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
93
+ }
94
+
57
95
  void parser_append_unexpected_error(parser_T* parser, const char* description, const char* expected, array_T* errors) {
58
96
  token_T* token = parser_advance(parser);
59
97
 
@@ -166,3 +204,11 @@ void parser_handle_mismatched_tags(
166
204
  );
167
205
  }
168
206
  }
207
+
208
+ bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type) {
209
+ const char* expected_tag_name = parser_get_foreign_content_closing_tag(expected_type);
210
+
211
+ if (expected_tag_name == NULL || tag_name == NULL) { return false; }
212
+
213
+ return strcmp(tag_name, expected_tag_name) == 0;
214
+ }
data/src/token.c CHANGED
@@ -55,6 +55,7 @@ const char* token_type_to_string(const token_type_T type) {
55
55
  case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
56
56
  case TOKEN_EQUALS: return "TOKEN_EQUALS";
57
57
  case TOKEN_QUOTE: return "TOKEN_QUOTE";
58
+ case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
58
59
  case TOKEN_DASH: return "TOKEN_DASH";
59
60
  case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
60
61
  case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
data/src/utf8.c ADDED
@@ -0,0 +1,46 @@
1
+ #include "include/utf8.h"
2
+
3
+ // UTF-8 byte patterns:
4
+ // 0xxxxxxx = 1 byte (ASCII)
5
+ // 110xxxxx = 2 bytes
6
+ // 1110xxxx = 3 bytes
7
+ // 11110xxx = 4 bytes
8
+ int utf8_char_byte_length(unsigned char first_byte) {
9
+ if ((first_byte & 0x80) == 0) {
10
+ return 1;
11
+ } else if ((first_byte & 0xE0) == 0xC0) {
12
+ return 2;
13
+ } else if ((first_byte & 0xF0) == 0xE0) {
14
+ return 3;
15
+ } else if ((first_byte & 0xF8) == 0xF0) {
16
+ return 4;
17
+ }
18
+
19
+ return 1;
20
+ }
21
+
22
+ // Continuation bytes have pattern 10xxxxxx
23
+ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
+ return (byte & 0xC0) == 0x80;
25
+ }
26
+
27
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
+ if (position >= max_length) { return 0; }
29
+
30
+ unsigned char first_byte = (unsigned char) str[position];
31
+ int expected_length = utf8_char_byte_length(first_byte);
32
+
33
+ if (position + expected_length > max_length) {
34
+ return 1; // Not enough bytes, treat as single byte
35
+ }
36
+
37
+ if (expected_length > 1) {
38
+ for (int i = 1; i < expected_length; i++) {
39
+ if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
+ return 1; // Invalid continuation byte, treat first byte as single byte
41
+ }
42
+ }
43
+ }
44
+
45
+ return expected_length;
46
+ }
data/src/visitor.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/visitor.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/visitor.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: herb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: arm-linux-gnu
6
6
  authors:
7
7
  - Marco Roth
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-07-28 00:00:00.000000000 Z
10
+ date: 2025-08-17 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Powerful and seamless HTML-aware ERB parsing and tooling.
13
13
  email:
@@ -125,6 +125,7 @@ files:
125
125
  - src/include/token.h
126
126
  - src/include/token_matchers.h
127
127
  - src/include/token_struct.h
128
+ - src/include/utf8.h
128
129
  - src/include/util.h
129
130
  - src/include/version.h
130
131
  - src/include/visitor.h
@@ -144,6 +145,7 @@ files:
144
145
  - src/ruby_parser.c
145
146
  - src/token.c
146
147
  - src/token_matchers.c
148
+ - src/utf8.c
147
149
  - src/util.c
148
150
  - src/visitor.c
149
151
  homepage: https://herb-tools.dev