herb 0.4.1-x86_64-darwin → 0.4.3-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b360b4b7fa8caf15b801233c08a828fc864fae0ac4840a275ef5c62a7ab2b928
4
- data.tar.gz: 9ca92fbeb41e08b295e830d7a1f08b903e5078f12698981ab620c96c6a900de1
3
+ metadata.gz: 95232472925a59b33f2398ebf7ca9712fa19254e58ce26d666b945868ceeb180
4
+ data.tar.gz: 9f0d9be75fd9a6718eaa840edfa7b90b968fccc0ad9861d340bb7595a996fe0a
5
5
  SHA512:
6
- metadata.gz: 2ffedf74d9e2ce5567744fd0f199fba74338a16ec72442782ae43dedcea8228e2ace2dc8c8273e042d3e15c08c5157e41064a4ef82416d8c4d0d224e1e5370aa
7
- data.tar.gz: 0d0f842aab42190fe2923ad19c03cdae5b06dbbf0b6ec40ffab56e8fb3e6f5a2e69e191358f93545686f009378f183b3c196d9f1218200a3bd55390bbe94898e
6
+ metadata.gz: b9257722911a8f0899c4f86f1cb7a5ec9066ff62c6f95ad02f615d8fd4f7361f4f301d87e2ecf240ff13462d2e880cd4bc4bcdd00a77f85e0b1da56aefad8c96
7
+ data.tar.gz: d5ef5c5ed4ec6db578a89b4b931ef7bee66a568bf684476bb0cd73c6b7d636382258922c675c0a469731c3b1bee624d07c2bb07d8f2f38d97a3118a5a1af95af
data/README.md CHANGED
@@ -43,11 +43,11 @@ You can use Herb programmatically in **Ruby**, as well as in **JavaScript** via
43
43
 
44
44
  For a complete overview of all available tools, libraries, and integrations, visit the [**Projects page**](https://herb-tools.dev/projects) on our documentation site.
45
45
 
46
- ## Motiviation
46
+ ## Motivation
47
47
 
48
48
  HTML+ERB templates never really had good, accurate, and reliable tooling. While developer tooling for Ruby code improved significantly in the last few years (especially with the introduction of the new Prism parser), HTML+ERB files remained underserved, lacking fundamental support like syntax checking, auto-formatting, linting, and structural understanding.
49
49
 
50
- At the same time, with the rise of tools like [Hotwire](https://hotwired.dev), [Stimulus](https://stimulus.hotwired.dev), [Turbo](https://turbo.hotwired.dev), [HTMX](https://htmx.org), [Unploy](https://unpoly.com), and [Alpine.js](https://alpinejs.dev), advanced HTML templating became increasingly relevant (again). Developers expect modern, reliable, and precise tooling, especially given the robust ecosystem available to JavaScript frameworks and libraries.
50
+ At the same time, with the rise of tools like [Hotwire](https://hotwired.dev), [Stimulus](https://stimulus.hotwired.dev), [Turbo](https://turbo.hotwired.dev), [HTMX](https://htmx.org), [Unpoly](https://unpoly.com), and [Alpine.js](https://alpinejs.dev), advanced HTML templating became increasingly relevant (again). Developers expect modern, reliable, and precise tooling, especially given the robust ecosystem available to JavaScript frameworks and libraries.
51
51
 
52
52
  Herb was built to close this tooling gap, providing proper tooling for HTML+ERB that matches what modern developers expect in the age of language servers, LLMs, and AI-driven workflows.
53
53
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/ext/herb/error_helpers.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.c.erb
3
3
 
4
4
  #include <ruby.h>
5
5
 
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/ext/herb/error_helpers.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/error_helpers.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_ERROR_HELPERS_H
5
5
  #define HERB_EXTENSION_ERROR_HELPERS_H
data/ext/herb/nodes.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/ext/herb/nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.c.erb
3
3
 
4
4
  #include <ruby.h>
5
5
 
@@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte
415
415
  VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
416
416
  VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
417
417
  VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
418
- /* #<Herb::Template::AnalyzedRubyField:0x00007ffffed7e4d8 @name="analyzed_ruby", @options={kind: nil}> */
418
+ /* #<Herb::Template::AnalyzedRubyField:0x00007fffe3358dc0 @name="analyzed_ruby", @options={kind: nil}> */
419
419
  VALUE erb_content_node_analyzed_ruby = Qnil;
420
420
  VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
421
421
  VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;
data/ext/herb/nodes.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/ext/herb/nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/ext/herb/nodes.h.erb
3
3
 
4
4
  #ifndef HERB_EXTENSION_NODES_H
5
5
  #define HERB_EXTENSION_NODES_H
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-4/templates/lib/herb/ast/nodes.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/ast/nodes.rb.erb
6
6
 
7
7
  module Herb
8
8
  module AST
data/lib/herb/cli.rb CHANGED
@@ -110,8 +110,8 @@ class Herb::CLI
110
110
  project.no_interactive = no_interactive
111
111
  project.no_log_file = no_log_file
112
112
  project.no_timing = no_timing
113
- project.parse!
114
- exit(0)
113
+ has_issues = project.parse!
114
+ exit(has_issues ? 1 : 0)
115
115
  when "parse"
116
116
  Herb.parse(file_content)
117
117
  when "lex"
data/lib/herb/errors.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-4/templates/lib/herb/errors.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/errors.rb.erb
6
6
 
7
7
  module Herb
8
8
  module Errors
data/lib/herb/project.rb CHANGED
@@ -366,6 +366,8 @@ module Herb
366
366
  end
367
367
 
368
368
  puts "\nResults saved to #{output_file}" unless no_log_file
369
+
370
+ problem_files.any?
369
371
  ensure
370
372
  log.close unless no_log_file
371
373
  end
data/lib/herb/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
  # typed: true
3
3
 
4
4
  module Herb
5
- VERSION = "0.4.1"
5
+ VERSION = "0.4.3"
6
6
  end
data/lib/herb/visitor.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-4/templates/lib/herb/visitor.rb.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-7/templates/lib/herb/visitor.rb.erb
6
6
 
7
7
  module Herb
8
8
  class Visitor
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-4/templates/sig/serialized_ast_errors.rbs.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_errors.rbs.erb
6
6
 
7
7
  module Herb
8
8
  type serialized_unexpected_error = serialized_error & {
@@ -2,7 +2,7 @@
2
2
  # typed: true
3
3
 
4
4
  # NOTE: This file is generated by the templates/template.rb script and should not be
5
- # modified manually. See /Users/marcoroth/Development/herb-release-4/templates/sig/serialized_ast_nodes.rbs.erb
5
+ # modified manually. See /Users/marcoroth/Development/herb-release-7/templates/sig/serialized_ast_nodes.rbs.erb
6
6
 
7
7
  module Herb
8
8
  type serialized_document_node = serialized_node & {
data/src/analyze.c CHANGED
@@ -39,7 +39,7 @@ static analyzed_ruby_T* herb_analyze_ruby(char* source) {
39
39
  search_in_nodes(analyzed);
40
40
  search_rescue_nodes(analyzed);
41
41
  search_ensure_nodes(analyzed);
42
- search_yield_nodes(analyzed);
42
+ search_yield_nodes(analyzed->root, analyzed);
43
43
  search_block_closing_nodes(analyzed);
44
44
 
45
45
  return analyzed;
@@ -95,8 +95,13 @@ static control_type_t detect_control_type(AST_ERB_CONTENT_NODE_T* erb_node) {
95
95
 
96
96
  if (!ruby) { return CONTROL_TYPE_UNKNOWN; }
97
97
 
98
- if (ruby->valid) { return CONTROL_TYPE_UNKNOWN; }
98
+ if (ruby->valid) {
99
+ if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
100
+ return CONTROL_TYPE_UNKNOWN;
101
+ }
99
102
 
103
+ if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
104
+ if (has_block_node(ruby)) { return CONTROL_TYPE_BLOCK; }
100
105
  if (has_if_node(ruby)) { return CONTROL_TYPE_IF; }
101
106
  if (has_elsif_node(ruby)) { return CONTROL_TYPE_ELSIF; }
102
107
  if (has_else_node(ruby)) { return CONTROL_TYPE_ELSE; }
@@ -112,8 +117,6 @@ static control_type_t detect_control_type(AST_ERB_CONTENT_NODE_T* erb_node) {
112
117
  if (has_while_node(ruby)) { return CONTROL_TYPE_WHILE; }
113
118
  if (has_until_node(ruby)) { return CONTROL_TYPE_UNTIL; }
114
119
  if (has_for_node(ruby)) { return CONTROL_TYPE_FOR; }
115
- if (has_block_node(ruby)) { return CONTROL_TYPE_BLOCK; }
116
- if (has_yield_node(ruby)) { return CONTROL_TYPE_YIELD; }
117
120
  if (has_block_closing(ruby)) { return CONTROL_TYPE_BLOCK_CLOSE; }
118
121
 
119
122
  return CONTROL_TYPE_UNKNOWN;
@@ -1020,10 +1023,22 @@ static array_T* rewrite_node_array(AST_NODE_T* node, array_T* array, analyze_rub
1020
1023
  case CONTROL_TYPE_UNTIL:
1021
1024
  case CONTROL_TYPE_FOR:
1022
1025
  case CONTROL_TYPE_BLOCK:
1023
- case CONTROL_TYPE_YIELD:
1024
1026
  index = process_control_structure(node, array, index, new_array, context, type);
1025
1027
  continue;
1026
1028
 
1029
+ case CONTROL_TYPE_YIELD: {
1030
+ AST_NODE_T* yield_node = create_control_node(erb_node, array_init(8), NULL, NULL, type);
1031
+
1032
+ if (yield_node) {
1033
+ array_append(new_array, yield_node);
1034
+ } else {
1035
+ array_append(new_array, item);
1036
+ }
1037
+
1038
+ index++;
1039
+ break;
1040
+ }
1041
+
1027
1042
  default:
1028
1043
  array_append(new_array, item);
1029
1044
  index++;
@@ -279,10 +279,14 @@ bool search_ensure_nodes(analyzed_ruby_T* analyzed) {
279
279
  return false;
280
280
  }
281
281
 
282
- bool search_yield_nodes(analyzed_ruby_T* analyzed) {
283
- if (has_error_message(analyzed, "Invalid yield")) {
282
+ bool search_yield_nodes(const pm_node_t* node, void* data) {
283
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
284
+
285
+ if (node->type == PM_YIELD_NODE) {
284
286
  analyzed->has_yield_node = true;
285
287
  return true;
288
+ } else {
289
+ pm_visit_child_nodes(node, search_yield_nodes, analyzed);
286
290
  }
287
291
 
288
292
  return false;
data/src/ast_nodes.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/ast_nodes.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_nodes.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
  #include <stdbool.h>
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/ast_pretty_print.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/ast_pretty_print.c.erb
3
3
 
4
4
  #include "include/ast_node.h"
5
5
  #include "include/ast_nodes.h"
data/src/errors.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/errors.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/errors.c.erb
3
3
 
4
4
  #include "include/array.h"
5
5
  #include "include/errors.h"
@@ -44,6 +44,6 @@ bool search_when_nodes(analyzed_ruby_T* analyzed);
44
44
  bool search_in_nodes(analyzed_ruby_T* analyzed);
45
45
  bool search_rescue_nodes(analyzed_ruby_T* analyzed);
46
46
  bool search_ensure_nodes(analyzed_ruby_T* analyzed);
47
- bool search_yield_nodes(analyzed_ruby_T* analyzed);
47
+ bool search_yield_nodes(const pm_node_t* node, void* data);
48
48
 
49
49
  #endif
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/include/ast_nodes.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_nodes.h.erb
3
3
 
4
4
  #ifndef HERB_AST_NODES_H
5
5
  #define HERB_AST_NODES_H
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/include/ast_pretty_print.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/ast_pretty_print.h.erb
3
3
 
4
4
  #ifndef HERB_AST_PRETTY_PRINT_H
5
5
  #define HERB_AST_PRETTY_PRINT_H
data/src/include/errors.h CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/include/errors.h.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/include/errors.h.erb
3
3
 
4
4
  #ifndef HERB_ERRORS_H
5
5
  #define HERB_ERRORS_H
@@ -0,0 +1,11 @@
1
+ #ifndef HERB_UTF8_H
2
+ #define HERB_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stdlib.h>
6
+
7
+ int utf8_char_byte_length(unsigned char first_byte);
8
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length);
9
+ bool utf8_is_valid_continuation_byte(unsigned char byte);
10
+
11
+ #endif
@@ -1,6 +1,6 @@
1
1
  #ifndef HERB_VERSION_H
2
2
  #define HERB_VERSION_H
3
3
 
4
- #define HERB_VERSION "0.4.1"
4
+ #define HERB_VERSION "0.4.3"
5
5
 
6
6
  #endif
data/src/lexer.c CHANGED
@@ -1,6 +1,7 @@
1
1
  #include "include/buffer.h"
2
2
  #include "include/lexer_peek_helpers.h"
3
3
  #include "include/token.h"
4
+ #include "include/utf8.h"
4
5
  #include "include/util.h"
5
6
 
6
7
  #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
84
85
  }
85
86
  }
86
87
 
88
+ static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
89
+ if (byte_count <= 0) { return; }
90
+
91
+ if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
92
+ if (!is_newline(lexer->current_character)) { lexer->current_column++; }
93
+
94
+ lexer->current_position += byte_count;
95
+
96
+ if (lexer->current_position >= lexer->source_length) {
97
+ lexer->current_position = lexer->source_length;
98
+ lexer->current_character = '\0';
99
+ } else {
100
+ lexer->current_character = lexer->source[lexer->current_position];
101
+ }
102
+ }
103
+ }
104
+
87
105
  static void lexer_advance_by(lexer_T* lexer, const size_t count) {
88
106
  for (size_t i = 0; i < count; i++) {
89
107
  lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
116
134
  return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
117
135
  }
118
136
 
137
+ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
138
+ int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
139
+
140
+ if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
141
+
142
+ char* utf8_char = malloc(char_byte_length + 1);
143
+
144
+ if (!utf8_char) { return lexer_advance_current(lexer, type); }
145
+
146
+ for (int i = 0; i < char_byte_length; i++) {
147
+ if (lexer->current_position + i >= lexer->source_length) {
148
+ free(utf8_char);
149
+ return lexer_advance_current(lexer, type);
150
+ }
151
+
152
+ utf8_char[i] = lexer->source[lexer->current_position + i];
153
+ }
154
+
155
+ utf8_char[char_byte_length] = '\0';
156
+
157
+ lexer_advance_utf8_bytes(lexer, char_byte_length);
158
+
159
+ token_T* token = token_init(utf8_char, type, lexer);
160
+
161
+ free(utf8_char);
162
+
163
+ return token;
164
+ }
165
+
119
166
  static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
120
167
  if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
121
168
  return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
232
279
  if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
233
280
 
234
281
  if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
235
- return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
282
+ return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
236
283
  }
237
284
 
238
285
  switch (lexer->current_character) {
@@ -282,7 +329,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
282
329
  default: {
283
330
  if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
284
331
 
285
- return lexer_advance_current(lexer, TOKEN_CHARACTER);
332
+ return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
286
333
  }
287
334
  }
288
335
  }
data/src/parser.c CHANGED
@@ -9,6 +9,7 @@
9
9
  #include "include/parser_helpers.h"
10
10
  #include "include/token.h"
11
11
  #include "include/token_matchers.h"
12
+ #include "include/util.h"
12
13
 
13
14
  #include <stdio.h>
14
15
  #include <stdlib.h>
@@ -184,14 +185,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
184
185
 
185
186
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
186
187
  array_T* errors = array_init(8);
187
- token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
188
188
 
189
- if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
189
+ token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
190
+ token_T* first_token = NULL;
191
+
192
+ if (at_token != NULL) {
193
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
194
+
195
+ if (first_token == NULL) {
196
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
197
+
198
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
199
+ ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
200
+
201
+ token_free(at_token);
202
+
203
+ return attribute_name;
204
+ }
205
+ } else {
206
+ first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
207
+
208
+ if (first_token == NULL) {
209
+ parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
210
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
211
+ return attribute_name;
212
+ }
213
+ }
214
+
215
+ buffer_T name_buffer = buffer_new();
216
+
217
+ position_T* start_position;
218
+
219
+ if (at_token != NULL) {
220
+ buffer_append(&name_buffer, at_token->value);
221
+ start_position = position_copy(at_token->location->start);
222
+ } else {
223
+ start_position = position_copy(first_token->location->start);
224
+ }
225
+
226
+ buffer_append(&name_buffer, first_token->value);
227
+
228
+ position_T* end_position = position_copy(first_token->location->end);
229
+ size_t range_end = first_token->range->to;
230
+
231
+ while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
232
+ && strcmp(parser->current_token->value, ".") == 0) {
233
+
234
+ token_T* dot_token = parser_advance(parser);
235
+
236
+ buffer_append(&name_buffer, dot_token->value);
237
+ position_free(end_position);
238
+
239
+ end_position = position_copy(dot_token->location->end);
240
+ range_end = dot_token->range->to;
241
+
242
+ token_free(dot_token);
243
+
244
+ if (parser->current_token->type == TOKEN_IDENTIFIER) {
245
+ token_T* next_identifier = parser_advance(parser);
246
+
247
+ buffer_append(&name_buffer, next_identifier->value);
248
+ position_free(end_position);
249
+
250
+ end_position = position_copy(next_identifier->location->end);
251
+ range_end = next_identifier->range->to;
252
+ token_free(next_identifier);
253
+ } else {
254
+ break;
255
+ }
256
+ }
257
+
258
+ token_T* combined_token = calloc(1, sizeof(token_T));
259
+ combined_token->value = herb_strdup(name_buffer.value);
260
+ combined_token->type = TOKEN_IDENTIFIER;
261
+ combined_token->location =
262
+ location_from(start_position->line, start_position->column, end_position->line, end_position->column);
263
+
264
+ size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
265
+ combined_token->range = range_init(range_start, range_end);
190
266
 
191
267
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
192
- ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
268
+ ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
193
269
 
194
- token_free(identifier);
270
+ buffer_free(&name_buffer);
271
+ position_free(start_position);
272
+ position_free(end_position);
273
+ token_free(first_token);
274
+
275
+ if (at_token != NULL) { token_free(at_token); }
276
+
277
+ token_free(combined_token);
195
278
 
196
279
  return attribute_name;
197
280
  }
@@ -390,10 +473,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
390
473
  continue;
391
474
  }
392
475
 
476
+ if (parser->current_token->type == TOKEN_AT) {
477
+ array_append(children, parser_parse_html_attribute(parser));
478
+ continue;
479
+ }
480
+
393
481
  parser_append_unexpected_error(
394
482
  parser,
395
483
  "Unexpected Token",
396
- "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
484
+ "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
397
485
  errors
398
486
  );
399
487
  }
@@ -441,6 +529,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
441
529
 
442
530
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
443
531
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
532
+
533
+ while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
534
+ token_T* whitespace = parser_advance(parser);
535
+ token_free(whitespace);
536
+ }
537
+
444
538
  token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
445
539
 
446
540
  if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -618,12 +712,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
618
712
  if (token_is_any_of(
619
713
  parser,
620
714
  TOKEN_AMPERSAND,
715
+ TOKEN_AT,
621
716
  TOKEN_CHARACTER,
622
717
  TOKEN_COLON,
623
718
  TOKEN_DASH,
624
719
  TOKEN_EQUALS,
625
720
  TOKEN_EXCLAMATION,
626
721
  TOKEN_IDENTIFIER,
722
+ TOKEN_NBSP,
627
723
  TOKEN_NEWLINE,
628
724
  TOKEN_PERCENT,
629
725
  TOKEN_QUOTE,
@@ -639,8 +735,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
639
735
  parser_append_unexpected_error(
640
736
  parser,
641
737
  "Unexpected token",
642
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
643
- "TOKEN_NEWLINE",
738
+ "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
739
+ "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
644
740
  errors
645
741
  );
646
742
  }
data/src/utf8.c ADDED
@@ -0,0 +1,46 @@
1
+ #include "include/utf8.h"
2
+
3
+ // UTF-8 byte patterns:
4
+ // 0xxxxxxx = 1 byte (ASCII)
5
+ // 110xxxxx = 2 bytes
6
+ // 1110xxxx = 3 bytes
7
+ // 11110xxx = 4 bytes
8
+ int utf8_char_byte_length(unsigned char first_byte) {
9
+ if ((first_byte & 0x80) == 0) {
10
+ return 1;
11
+ } else if ((first_byte & 0xE0) == 0xC0) {
12
+ return 2;
13
+ } else if ((first_byte & 0xF0) == 0xE0) {
14
+ return 3;
15
+ } else if ((first_byte & 0xF8) == 0xF0) {
16
+ return 4;
17
+ }
18
+
19
+ return 1;
20
+ }
21
+
22
+ // Continuation bytes have pattern 10xxxxxx
23
+ bool utf8_is_valid_continuation_byte(unsigned char byte) {
24
+ return (byte & 0xC0) == 0x80;
25
+ }
26
+
27
+ int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
28
+ if (position >= max_length) { return 0; }
29
+
30
+ unsigned char first_byte = (unsigned char) str[position];
31
+ int expected_length = utf8_char_byte_length(first_byte);
32
+
33
+ if (position + expected_length > max_length) {
34
+ return 1; // Not enough bytes, treat as single byte
35
+ }
36
+
37
+ if (expected_length > 1) {
38
+ for (int i = 1; i < expected_length; i++) {
39
+ if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
40
+ return 1; // Invalid continuation byte, treat first byte as single byte
41
+ }
42
+ }
43
+ }
44
+
45
+ return expected_length;
46
+ }
data/src/visitor.c CHANGED
@@ -1,5 +1,5 @@
1
1
  // NOTE: This file is generated by the templates/template.rb script and should not
2
- // be modified manually. See /Users/marcoroth/Development/herb-release-4/templates/src/visitor.c.erb
2
+ // be modified manually. See /Users/marcoroth/Development/herb-release-7/templates/src/visitor.c.erb
3
3
 
4
4
  #include <stdio.h>
5
5
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: herb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Marco Roth
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-07-22 00:00:00.000000000 Z
10
+ date: 2025-08-03 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Powerful and seamless HTML-aware ERB parsing and tooling.
13
13
  email:
@@ -125,6 +125,7 @@ files:
125
125
  - src/include/token.h
126
126
  - src/include/token_matchers.h
127
127
  - src/include/token_struct.h
128
+ - src/include/utf8.h
128
129
  - src/include/util.h
129
130
  - src/include/version.h
130
131
  - src/include/visitor.h
@@ -144,6 +145,7 @@ files:
144
145
  - src/ruby_parser.c
145
146
  - src/token.c
146
147
  - src/token_matchers.c
148
+ - src/utf8.c
147
149
  - src/util.c
148
150
  - src/visitor.c
149
151
  homepage: https://herb-tools.dev