RubyGems - herb - Versions diffs - 0.4.2-arm-linux-gnu → 0.5.0-arm-linux-gnu - Mend

herb 0.4.2-arm-linux-gnu → 0.5.0-arm-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/ext/herb/error_helpers.c +1 -1
data/ext/herb/error_helpers.h +1 -1
data/ext/herb/nodes.c +2 -2
data/ext/herb/nodes.h +1 -1
data/lib/herb/3.0/herb.so +0 -0
data/lib/herb/3.1/herb.so +0 -0
data/lib/herb/3.2/herb.so +0 -0
data/lib/herb/3.3/herb.so +0 -0
data/lib/herb/3.4/herb.so +0 -0
data/lib/herb/ast/nodes.rb +1 -1
data/lib/herb/cli.rb +2 -2
data/lib/herb/errors.rb +1 -1
data/lib/herb/project.rb +2 -0
data/lib/herb/version.rb +1 -1
data/lib/herb/visitor.rb +1 -1
data/sig/serialized_ast_errors.rbs +1 -1
data/sig/serialized_ast_nodes.rbs +1 -1
data/src/analyze.c +2 -1
data/src/ast_nodes.c +1 -1
data/src/ast_pretty_print.c +1 -1
data/src/errors.c +1 -1
data/src/include/ast_nodes.h +1 -1
data/src/include/ast_pretty_print.h +1 -1
data/src/include/errors.h +1 -1
data/src/include/parser.h +12 -0
data/src/include/parser_helpers.h +9 -0
data/src/include/token_struct.h +1 -0
data/src/include/utf8.h +11 -0
data/src/include/version.h +1 -1
data/src/lexer.c +50 -2
data/src/parser.c +224 -8
data/src/parser_helpers.c +46 -0
data/src/token.c +1 -0
data/src/utf8.c +46 -0
data/src/visitor.c +1 -1
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f27aaaad81ad225f5316278ca8a3c2412d624597d2a28e7eb5cafce3db32316e
-  data.tar.gz: d6c58ecbfed32f6c6afda64e1956d24f8b354bff4647ec6b01c93846742e51c5
+  metadata.gz: 58698840c9bcb8e894eb66edacdd95511702fd1afb0f63995aca04460dae9144
+  data.tar.gz: d932daf7cbc0e0e14f885fbf7a70b097d48361d0108ba7cd85a70948d3471c2f
 SHA512:
-  metadata.gz: 871f6d51b0eb6be8d052b4ede53807e598a4e65a2b1d94139b2e834f731d11b136624dbe1e9351cc357e100afb1b98f4fe447b114b45328a51d6858747b6d2d2
-  data.tar.gz: 8ad246404e5f6e1e3c6926bfba00a7ea99860d82e8824b0290c29123edc71a11ab0a52801f196819b4e714754ec0a985d28f6f1151ae47987a7d19a0380b1b06
+  metadata.gz: a49a007afc27785a6d96c959b1467303d0c81130cdc6e10e8713baa8f240d36765dc655c22a75e446291f0d4d8addead9ffce2fd09af7589fc64f19c50e5ea5b
+  data.tar.gz: e0ea0d65b094d779ce93237fcf19098a7da5149b141fc1743022cde10f94a6b75aea0e2896d15704be9a103c26ba2412b27026b855070d5179df838e7add9523

data/README.md CHANGED Viewed

@@ -37,7 +37,7 @@ Herb provides a complete ecosystem of HTML+ERB tooling, designed to simplify and
   Automatic, consistent formatting for HTML+ERB files, reducing manual styling and enforcing a standard across projects. Currently in experimental preview - use with caution on version-controlled files.
 - **Herb Linter** ([available now](https://herb-tools.dev/projects/linter)):
-  Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with 17 configurable rules.
+  Static analysis for your HTML+ERB templates to enforce best practices and quickly identify common mistakes with plenty of rules.
 You can use Herb programmatically in **Ruby**, as well as in **JavaScript** via Node.js, WebAssembly, or directly in browsers.

data/ext/herb/error_helpers.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/error_helpers.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.c.erb
 #include <ruby.h>

data/ext/herb/error_helpers.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/error_helpers.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/error_helpers.h.erb
 #ifndef HERB_EXTENSION_ERROR_HELPERS_H
 #define HERB_EXTENSION_ERROR_HELPERS_H

data/ext/herb/nodes.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/nodes.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.c.erb
 #include <ruby.h>
@@ -415,7 +415,7 @@ static VALUE rb_erb_content_node_from_c_struct(AST_ERB_CONTENT_NODE_T* erb_conte
   VALUE erb_content_node_tag_opening = rb_token_from_c_struct(erb_content_node->tag_opening);
   VALUE erb_content_node_content = rb_token_from_c_struct(erb_content_node->content);
   VALUE erb_content_node_tag_closing = rb_token_from_c_struct(erb_content_node->tag_closing);
-  /* #<Herb::Template::AnalyzedRubyField:0x00007fffe335ba20 @name="analyzed_ruby", @options={kind: nil}> */
+  /* #<Herb::Template::AnalyzedRubyField:0x00007fffe3389178 @name="analyzed_ruby", @options={kind: nil}> */
   VALUE erb_content_node_analyzed_ruby = Qnil;
   VALUE erb_content_node_parsed = (erb_content_node->parsed) ? Qtrue : Qfalse;
   VALUE erb_content_node_valid = (erb_content_node->valid) ? Qtrue : Qfalse;

data/ext/herb/nodes.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/ext/herb/nodes.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/ext/herb/nodes.h.erb
 #ifndef HERB_EXTENSION_NODES_H
 #define HERB_EXTENSION_NODES_H

data/lib/herb/3.0/herb.so CHANGED Viewed

Binary file

data/lib/herb/3.1/herb.so CHANGED Viewed

Binary file

data/lib/herb/3.2/herb.so CHANGED Viewed

Binary file

data/lib/herb/3.3/herb.so CHANGED Viewed

Binary file

data/lib/herb/3.4/herb.so CHANGED Viewed

Binary file

data/lib/herb/ast/nodes.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 # typed: true
 # NOTE: This file is generated by the templates/template.rb script and should not be
-# modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/ast/nodes.rb.erb
+# modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/ast/nodes.rb.erb
 module Herb
   module AST

data/lib/herb/cli.rb CHANGED Viewed

@@ -110,8 +110,8 @@ class Herb::CLI
                   project.no_interactive = no_interactive
                   project.no_log_file = no_log_file
                   project.no_timing = no_timing
-                  project.parse!
-                  exit(0)
+                  has_issues = project.parse!
+                  exit(has_issues ? 1 : 0)
                 when "parse"
                   Herb.parse(file_content)
                 when "lex"

data/lib/herb/errors.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 # typed: true
 # NOTE: This file is generated by the templates/template.rb script and should not be
-# modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/errors.rb.erb
+# modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/errors.rb.erb
 module Herb
   module Errors

data/lib/herb/project.rb CHANGED Viewed

@@ -366,6 +366,8 @@ module Herb
         end
         puts "\nResults saved to #{output_file}" unless no_log_file
+        problem_files.any?
       ensure
         log.close unless no_log_file
       end

data/lib/herb/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 # typed: true
 module Herb
-  VERSION = "0.4.2"
+  VERSION = "0.5.0"
 end

data/lib/herb/visitor.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 # typed: true
 # NOTE: This file is generated by the templates/template.rb script and should not be
-# modified manually. See /Users/marcoroth/Development/herb-release-6/templates/lib/herb/visitor.rb.erb
+# modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/lib/herb/visitor.rb.erb
 module Herb
   class Visitor

data/sig/serialized_ast_errors.rbs CHANGED Viewed

@@ -2,7 +2,7 @@
 # typed: true
 # NOTE: This file is generated by the templates/template.rb script and should not be
-# modified manually. See /Users/marcoroth/Development/herb-release-6/templates/sig/serialized_ast_errors.rbs.erb
+# modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_errors.rbs.erb
 module Herb
   type serialized_unexpected_error = serialized_error & {

data/sig/serialized_ast_nodes.rbs CHANGED Viewed

@@ -2,7 +2,7 @@
 # typed: true
 # NOTE: This file is generated by the templates/template.rb script and should not be
-# modified manually. See /Users/marcoroth/Development/herb-release-6/templates/sig/serialized_ast_nodes.rbs.erb
+# modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/sig/serialized_ast_nodes.rbs.erb
 module Herb
   type serialized_document_node = serialized_node & {

data/src/analyze.c CHANGED Viewed

@@ -50,7 +50,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
     AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
     const char* opening = erb_content_node->tag_opening->value;
-    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0) {
+    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
       analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
       if (false) { pretty_print_analyed_ruby(analyzed, erb_content_node->content->value); }

data/src/ast_nodes.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_nodes.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_nodes.c.erb
 #include <stdio.h>
 #include <stdbool.h>

data/src/ast_pretty_print.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/ast_pretty_print.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/ast_pretty_print.c.erb
 #include "include/ast_node.h"
 #include "include/ast_nodes.h"

data/src/errors.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/errors.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/errors.c.erb
 #include "include/array.h"
 #include "include/errors.h"

data/src/include/ast_nodes.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_nodes.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_nodes.h.erb
 #ifndef HERB_AST_NODES_H
 #define HERB_AST_NODES_H

data/src/include/ast_pretty_print.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/ast_pretty_print.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/ast_pretty_print.h.erb
 #ifndef HERB_AST_PRETTY_PRINT_H
 #define HERB_AST_PRETTY_PRINT_H

data/src/include/errors.h CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/include/errors.h.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/include/errors.h.erb
 #ifndef HERB_ERRORS_H
 #define HERB_ERRORS_H

data/src/include/parser.h CHANGED Viewed

@@ -5,10 +5,22 @@
 #include "ast_node.h"
 #include "lexer.h"
+typedef enum {
+  FOREIGN_CONTENT_UNKNOWN = 0,
+  FOREIGN_CONTENT_SCRIPT,
+  FOREIGN_CONTENT_STYLE,
+  // FOREIGN_CONTENT_RUBY,
+  // FOREIGN_CONTENT_TEMPLATE
+} foreign_content_type_T;
+typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
 typedef struct PARSER_STRUCT {
   lexer_T* lexer;
   token_T* current_token;
   array_T* open_tags_stack;
+  parser_state_T state;
+  foreign_content_type_T foreign_content_type;
 } parser_T;
 parser_T* parser_init(lexer_T* lexer);

data/src/include/parser_helpers.h CHANGED Viewed

@@ -24,6 +24,15 @@ void parser_append_literal_node_from_buffer(
 bool parser_in_svg_context(const parser_T* parser);
+foreign_content_type_T parser_get_foreign_content_type(const char* tag_name);
+bool parser_is_foreign_content_tag(const char* tag_name);
+const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type);
+void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type);
+void parser_exit_foreign_content(parser_T* parser);
+bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type);
 token_T* parser_advance(parser_T* parser);
 token_T* parser_consume_if_present(parser_T* parser, token_type_T type);
 token_T* parser_consume_expected(parser_T* parser, token_type_T type, array_T* array);

data/src/include/token_struct.h CHANGED Viewed

@@ -28,6 +28,7 @@ typedef enum {
   TOKEN_SLASH,       // /
   TOKEN_EQUALS,      // =
   TOKEN_QUOTE,       // ", '
+  TOKEN_BACKTICK,    // `
   TOKEN_DASH,        // -
   TOKEN_UNDERSCORE,  // _
   TOKEN_EXCLAMATION, // !

data/src/include/utf8.h ADDED Viewed

@@ -0,0 +1,11 @@
+#ifndef HERB_UTF8_H
+#define HERB_UTF8_H
+#include <stdbool.h>
+#include <stdlib.h>
+int utf8_char_byte_length(unsigned char first_byte);
+int utf8_sequence_length(const char* str, size_t position, size_t max_length);
+bool utf8_is_valid_continuation_byte(unsigned char byte);
+#endif

data/src/include/version.h CHANGED Viewed

@@ -1,6 +1,6 @@
 #ifndef HERB_VERSION_H
 #define HERB_VERSION_H
-#define HERB_VERSION "0.4.2"
+#define HERB_VERSION "0.5.0"
 #endif

data/src/lexer.c CHANGED Viewed

@@ -1,6 +1,7 @@
 #include "include/buffer.h"
 #include "include/lexer_peek_helpers.h"
 #include "include/token.h"
+#include "include/utf8.h"
 #include "include/util.h"
 #include <ctype.h>
@@ -84,6 +85,23 @@ static void lexer_advance(lexer_T* lexer) {
   }
 }
+static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
+  if (byte_count <= 0) { return; }
+  if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
+    if (!is_newline(lexer->current_character)) { lexer->current_column++; }
+    lexer->current_position += byte_count;
+    if (lexer->current_position >= lexer->source_length) {
+      lexer->current_position = lexer->source_length;
+      lexer->current_character = '\0';
+    } else {
+      lexer->current_character = lexer->source[lexer->current_position];
+    }
+  }
+}
 static void lexer_advance_by(lexer_T* lexer, const size_t count) {
   for (size_t i = 0; i < count; i++) {
     lexer_advance(lexer);
@@ -116,6 +134,35 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
   return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
 }
+static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
+  int char_byte_length = utf8_sequence_length(lexer->source, lexer->current_position, lexer->source_length);
+  if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
+  char* utf8_char = malloc(char_byte_length + 1);
+  if (!utf8_char) { return lexer_advance_current(lexer, type); }
+  for (int i = 0; i < char_byte_length; i++) {
+    if (lexer->current_position + i >= lexer->source_length) {
+      free(utf8_char);
+      return lexer_advance_current(lexer, type);
+    }
+    utf8_char[i] = lexer->source[lexer->current_position + i];
+  }
+  utf8_char[char_byte_length] = '\0';
+  lexer_advance_utf8_bytes(lexer, char_byte_length);
+  token_T* token = token_init(utf8_char, type, lexer);
+  free(utf8_char);
+  return token;
+}
 static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
   if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
     return lexer_advance_with(lexer, value, type);
@@ -232,7 +279,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
   if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
   if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
-    return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
+    return lexer_advance_utf8_character(lexer, TOKEN_NBSP);
   }
   switch (lexer->current_character) {
@@ -278,11 +325,12 @@ token_T* lexer_next_token(lexer_T* lexer) {
     case '"':
     case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
+    case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
     default: {
       if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
-      return lexer_advance_current(lexer, TOKEN_CHARACTER);
+      return lexer_advance_utf8_character(lexer, TOKEN_CHARACTER);
     }
   }
 }

data/src/parser.c CHANGED Viewed

@@ -9,6 +9,7 @@
 #include "include/parser_helpers.h"
 #include "include/token.h"
 #include "include/token_matchers.h"
+#include "include/util.h"
 #include <stdio.h>
 #include <stdlib.h>
@@ -16,6 +17,7 @@
 #include <strings.h>
 static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
+static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
 static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
 size_t parser_sizeof(void) {
@@ -28,6 +30,8 @@ parser_T* parser_init(lexer_T* lexer) {
   parser->lexer = lexer;
   parser->current_token = lexer_next_token(lexer);
   parser->open_tags_stack = array_init(16);
+  parser->state = PARSER_STATE_DATA;
+  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
   return parser;
 }
@@ -184,14 +188,96 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
 static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
   array_T* errors = array_init(8);
-  token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
-  if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
+  token_T* at_token = parser_consume_if_present(parser, TOKEN_AT);
+  token_T* first_token = NULL;
+  if (at_token != NULL) {
+    first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
+    if (first_token == NULL) {
+      parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
+      AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
+        ast_html_attribute_name_node_init(at_token, at_token->location->start, at_token->location->end, errors);
+      token_free(at_token);
+      return attribute_name;
+    }
+  } else {
+    first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
+    if (first_token == NULL) {
+      parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors);
+      AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = ast_html_attribute_name_node_init(NULL, NULL, NULL, errors);
+      return attribute_name;
+    }
+  }
+  buffer_T name_buffer = buffer_new();
+  position_T* start_position;
+  if (at_token != NULL) {
+    buffer_append(&name_buffer, at_token->value);
+    start_position = position_copy(at_token->location->start);
+  } else {
+    start_position = position_copy(first_token->location->start);
+  }
+  buffer_append(&name_buffer, first_token->value);
+  position_T* end_position = position_copy(first_token->location->end);
+  size_t range_end = first_token->range->to;
+  while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
+         && strcmp(parser->current_token->value, ".") == 0) {
+    token_T* dot_token = parser_advance(parser);
+    buffer_append(&name_buffer, dot_token->value);
+    position_free(end_position);
+    end_position = position_copy(dot_token->location->end);
+    range_end = dot_token->range->to;
+    token_free(dot_token);
+    if (parser->current_token->type == TOKEN_IDENTIFIER) {
+      token_T* next_identifier = parser_advance(parser);
+      buffer_append(&name_buffer, next_identifier->value);
+      position_free(end_position);
+      end_position = position_copy(next_identifier->location->end);
+      range_end = next_identifier->range->to;
+      token_free(next_identifier);
+    } else {
+      break;
+    }
+  }
+  token_T* combined_token = calloc(1, sizeof(token_T));
+  combined_token->value = herb_strdup(name_buffer.value);
+  combined_token->type = TOKEN_IDENTIFIER;
+  combined_token->location =
+    location_from(start_position->line, start_position->column, end_position->line, end_position->column);
+  size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
+  combined_token->range = range_init(range_start, range_end);
   AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
-    ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
+    ast_html_attribute_name_node_init(combined_token, start_position, end_position, errors);
+  buffer_free(&name_buffer);
+  position_free(start_position);
+  position_free(end_position);
+  token_free(first_token);
-  token_free(identifier);
+  if (at_token != NULL) { token_free(at_token); }
+  token_free(combined_token);
   return attribute_name;
 }
@@ -300,6 +386,30 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
   // <div id="home">
   if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
+  if (token_is(parser, TOKEN_BACKTICK)) {
+    token_T* token = parser_advance(parser);
+    position_T* start = position_copy(token->location->start);
+    position_T* end = position_copy(token->location->end);
+    append_unexpected_error(
+      "Invalid quote character for HTML attribute",
+      "single quote (') or double quote (\")",
+      "backtick (`)",
+      start,
+      end,
+      errors
+    );
+    AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
+      ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
+    position_free(start);
+    position_free(end);
+    token_free(token);
+    return value;
+  }
   token_T* token = parser_advance(parser);
   append_unexpected_error(
@@ -329,9 +439,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
 static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
   AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
+  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+    token_T* whitespace = parser_advance(parser);
+    token_free(whitespace);
+  }
   token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
   if (equals != NULL) {
+    while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+      token_T* whitespace = parser_advance(parser);
+      token_free(whitespace);
+    }
     AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
     AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -390,10 +510,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
       continue;
     }
+    if (parser->current_token->type == TOKEN_AT) {
+      array_append(children, parser_parse_html_attribute(parser));
+      continue;
+    }
     parser_append_unexpected_error(
       parser,
       "Unexpected Token",
-      "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
+      "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
       errors
     );
   }
@@ -441,6 +566,12 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
   token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
   token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
+  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
+    token_T* whitespace = parser_advance(parser);
+    token_free(whitespace);
+  }
   token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
   if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
@@ -502,7 +633,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
   parser_push_open_tag(parser, open_tag->tag_name);
-  parser_parse_in_data_state(parser, body, errors);
+  if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
+    foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
+    parser_enter_foreign_content(parser, content_type);
+    parser_parse_foreign_content(parser, body, errors);
+  } else {
+    parser_parse_in_data_state(parser, body, errors);
+  }
   if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
@@ -593,6 +730,83 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
   return erb_node;
 }
+static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
+  buffer_T content = buffer_new();
+  position_T* start = position_copy(parser->current_token->location->start);
+  const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
+  if (expected_closing_tag == NULL) {
+    parser_exit_foreign_content(parser);
+    position_free(start);
+    buffer_free(&content);
+    return;
+  }
+  while (!token_is(parser, TOKEN_EOF)) {
+    if (token_is(parser, TOKEN_ERB_START)) {
+      parser_append_literal_node_from_buffer(parser, &content, children, start);
+      AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
+      array_append(children, erb_node);
+      position_free(start);
+      start = position_copy(parser->current_token->location->start);
+      continue;
+    }
+    if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
+      size_t saved_position = parser->lexer->current_position;
+      size_t saved_line = parser->lexer->current_line;
+      size_t saved_column = parser->lexer->current_column;
+      size_t saved_previous_position = parser->lexer->previous_position;
+      size_t saved_previous_line = parser->lexer->previous_line;
+      size_t saved_previous_column = parser->lexer->previous_column;
+      char saved_char = parser->lexer->current_character;
+      lexer_state_T saved_state = parser->lexer->state;
+      token_T* next_token = lexer_next_token(parser->lexer);
+      bool is_potential_match = false;
+      if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
+        is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
+      }
+      parser->lexer->current_position = saved_position;
+      parser->lexer->current_line = saved_line;
+      parser->lexer->current_column = saved_column;
+      parser->lexer->previous_position = saved_previous_position;
+      parser->lexer->previous_line = saved_previous_line;
+      parser->lexer->previous_column = saved_previous_column;
+      parser->lexer->current_character = saved_char;
+      parser->lexer->state = saved_state;
+      if (next_token) { token_free(next_token); }
+      if (is_potential_match) {
+        parser_append_literal_node_from_buffer(parser, &content, children, start);
+        parser_exit_foreign_content(parser);
+        position_free(start);
+        buffer_free(&content);
+        return;
+      }
+    }
+    token_T* token = parser_advance(parser);
+    buffer_append(&content, token->value);
+    token_free(token);
+  }
+  parser_append_literal_node_from_buffer(parser, &content, children, start);
+  parser_exit_foreign_content(parser);
+  position_free(start);
+  buffer_free(&content);
+}
 static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
   while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
     if (token_is(parser, TOKEN_ERB_START)) {
@@ -618,12 +832,14 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
     if (token_is_any_of(
           parser,
           TOKEN_AMPERSAND,
+          TOKEN_AT,
           TOKEN_CHARACTER,
           TOKEN_COLON,
           TOKEN_DASH,
           TOKEN_EQUALS,
           TOKEN_EXCLAMATION,
           TOKEN_IDENTIFIER,
+          TOKEN_NBSP,
           TOKEN_NEWLINE,
           TOKEN_PERCENT,
           TOKEN_QUOTE,
@@ -639,8 +855,8 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
     parser_append_unexpected_error(
       parser,
       "Unexpected token",
-      "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
-      "TOKEN_NEWLINE",
+      "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
+      "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
       errors
     );
   }

data/src/parser_helpers.c CHANGED Viewed

@@ -8,6 +8,7 @@
 #include "include/lexer.h"
 #include "include/parser.h"
 #include "include/token.h"
+#include "include/token_matchers.h"
 #include <stdio.h>
 #include <strings.h>
@@ -54,6 +55,43 @@ bool parser_in_svg_context(const parser_T* parser) {
   return false;
 }
+// ===== Foreign Content Handling =====
+foreign_content_type_T parser_get_foreign_content_type(const char* tag_name) {
+  if (tag_name == NULL) { return FOREIGN_CONTENT_UNKNOWN; }
+  if (strcasecmp(tag_name, "script") == 0) { return FOREIGN_CONTENT_SCRIPT; }
+  if (strcasecmp(tag_name, "style") == 0) { return FOREIGN_CONTENT_STYLE; }
+  return FOREIGN_CONTENT_UNKNOWN;
+}
+bool parser_is_foreign_content_tag(const char* tag_name) {
+  return parser_get_foreign_content_type(tag_name) != FOREIGN_CONTENT_UNKNOWN;
+}
+const char* parser_get_foreign_content_closing_tag(foreign_content_type_T type) {
+  switch (type) {
+    case FOREIGN_CONTENT_SCRIPT: return "script";
+    case FOREIGN_CONTENT_STYLE: return "style";
+    default: return NULL;
+  }
+}
+void parser_enter_foreign_content(parser_T* parser, foreign_content_type_T type) {
+  if (parser == NULL) { return; }
+  parser->state = PARSER_STATE_FOREIGN_CONTENT;
+  parser->foreign_content_type = type;
+}
+void parser_exit_foreign_content(parser_T* parser) {
+  if (parser == NULL) { return; }
+  parser->state = PARSER_STATE_DATA;
+  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
+}
 void parser_append_unexpected_error(parser_T* parser, const char* description, const char* expected, array_T* errors) {
   token_T* token = parser_advance(parser);
@@ -166,3 +204,11 @@ void parser_handle_mismatched_tags(
     );
   }
 }
+bool parser_is_expected_closing_tag_name(const char* tag_name, foreign_content_type_T expected_type) {
+  const char* expected_tag_name = parser_get_foreign_content_closing_tag(expected_type);
+  if (expected_tag_name == NULL || tag_name == NULL) { return false; }
+  return strcmp(tag_name, expected_tag_name) == 0;
+}

data/src/token.c CHANGED Viewed

@@ -55,6 +55,7 @@ const char* token_type_to_string(const token_type_T type) {
     case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
     case TOKEN_EQUALS: return "TOKEN_EQUALS";
     case TOKEN_QUOTE: return "TOKEN_QUOTE";
+    case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
     case TOKEN_DASH: return "TOKEN_DASH";
     case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
     case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";

data/src/utf8.c ADDED Viewed

@@ -0,0 +1,46 @@
+#include "include/utf8.h"
+// UTF-8 byte patterns:
+//   0xxxxxxx = 1 byte (ASCII)
+//   110xxxxx = 2 bytes
+//   1110xxxx = 3 bytes
+//   11110xxx = 4 bytes
+int utf8_char_byte_length(unsigned char first_byte) {
+  if ((first_byte & 0x80) == 0) {
+    return 1;
+  } else if ((first_byte & 0xE0) == 0xC0) {
+    return 2;
+  } else if ((first_byte & 0xF0) == 0xE0) {
+    return 3;
+  } else if ((first_byte & 0xF8) == 0xF0) {
+    return 4;
+  }
+  return 1;
+}
+// Continuation bytes have pattern 10xxxxxx
+bool utf8_is_valid_continuation_byte(unsigned char byte) {
+  return (byte & 0xC0) == 0x80;
+}
+int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
+  if (position >= max_length) { return 0; }
+  unsigned char first_byte = (unsigned char) str[position];
+  int expected_length = utf8_char_byte_length(first_byte);
+  if (position + expected_length > max_length) {
+    return 1; // Not enough bytes, treat as single byte
+  }
+  if (expected_length > 1) {
+    for (int i = 1; i < expected_length; i++) {
+      if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
+        return 1; // Invalid continuation byte, treat first byte as single byte
+      }
+    }
+  }
+  return expected_length;
+}

data/src/visitor.c CHANGED Viewed

@@ -1,5 +1,5 @@
 // NOTE: This file is generated by the templates/template.rb script and should not
-// be modified manually. See /Users/marcoroth/Development/herb-release-6/templates/src/visitor.c.erb
+// be modified manually. See /Users/marcoroth/Development/herb-release-0.5.0/templates/src/visitor.c.erb
 #include <stdio.h>

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: herb
 version: !ruby/object:Gem::Version
-  version: 0.4.2
+  version: 0.5.0
 platform: arm-linux-gnu
 authors:
 - Marco Roth
 bindir: exe
 cert_chain: []
-date: 2025-07-28 00:00:00.000000000 Z
+date: 2025-08-17 00:00:00.000000000 Z
 dependencies: []
 description: Powerful and seamless HTML-aware ERB parsing and tooling.
 email:
@@ -125,6 +125,7 @@ files:
 - src/include/token.h
 - src/include/token_matchers.h
 - src/include/token_struct.h
+- src/include/utf8.h
 - src/include/util.h
 - src/include/version.h
 - src/include/visitor.h
@@ -144,6 +145,7 @@ files:
 - src/ruby_parser.c
 - src/token.c
 - src/token_matchers.c
+- src/utf8.c
 - src/util.c
 - src/visitor.c
 homepage: https://herb-tools.dev